import torch
import numpy as np
import pandas as pd
from pathlib import Path
import os
import traceback

# Add HuggingFace Hub imports
from huggingface_hub import hf_hub_download

# Import your model components
from models.model import RNAStructurePredictor
from scripts.description_encoder import encode_description
import scripts.rna_utils_optimized as ru

# Constants
WINDOW, OVERLAP, N_STRUCT = 256, 32, 5
SCALE = 10.0

class RNAFoldingPredictor:
    def __init__(self, model_path, device=None):
        self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {self.device}")
        
        # Load model
        self._load_model(model_path)
        
        # Initialize components
        try:
            print("Initializing torsion model...")
            self.torsion_model = ru.RNATorsionBERTforTorchCached()
            print("Torsion model initialized successfully")
        except Exception as e:
            print(f"Error initializing torsion model: {e}")
            print(traceback.format_exc())
            self.torsion_model = None
            
        try:
            print("Initializing description encoder...")
            self.desc_encoder = encode_description(device=self.device)
            print("Description encoder initialized successfully")
        except Exception as e:
            print(f"Error initializing description encoder: {e}")
            print(traceback.format_exc())
            self.desc_encoder = None
            
        self.desc_cache = {}

    def _load_model(self, model_path):
        try:
            print(f"\n==== MODEL LOADING DIAGNOSTICS ====")
            print(f"Attempting to load model from: {model_path}")
            
            # Check if the path is a local file or a Hugging Face model ID
            is_huggingface_path = not os.path.exists(model_path) and '/' in model_path
            
            if is_huggingface_path:
                print(f"Loading from Hugging Face repository: {model_path}")
                
                try:
                    # Try downloading the main model file
                    print("Attempting to download model file...")
                    model_file = hf_hub_download(repo_id=model_path, filename="best_val_model.pt")
                    print(f"Downloaded model file: {model_file}")
                    ckpt = torch.load(model_file, map_location=self.device)
                except Exception as e:
                    print(f"Failed to download model file directly: {e}")
                    print("Trying alternative filenames...")
                    
                    # Try downloading pytorch_model.bin
                    try:
                        model_file = hf_hub_download(repo_id=model_path, filename="pytorch_model.bin")
                        print(f"Downloaded model file: {model_file}")
                        ckpt = torch.load(model_file, map_location=self.device)
                    except Exception as e2:
                        print(f"Failed to download pytorch_model.bin: {e2}")
                        
                        # As a last resort, check if there's a sharded model
                        try:
                            # Try to download the model index
                            print("Attempting to download model index...")
                            index_path = hf_hub_download(repo_id=model_path, filename="pytorch_model.bin.index.json")
                            model_dir = os.path.dirname(index_path)
                            print(f"Downloaded model index to {model_dir}")
                            
                            # Create model instance
                            print("Creating model instance...")
                            self.model = RNAStructurePredictor().to(self.device)
                            
                            # Load sharded checkpoint
                            print(f"Loading sharded checkpoint from {model_dir}...")
                            from transformers.modeling_utils import load_sharded_checkpoint
                            load_sharded_checkpoint(self.model, model_dir)
                            self.model.eval()
                            print("Sharded checkpoint loaded successfully")
                            return True
                        except Exception as e3:
                            print(f"Failed to load sharded model: {e3}")
                            print("All loading methods failed. Using dummy model for testing.")
                            self.model = None
                            return False
                
                # If we got here, we successfully loaded a checkpoint
                print(f"Checkpoint loaded successfully. Type: {type(ckpt)}")
                
                # Extract state_dict and model_args
                if isinstance(ckpt, dict) and "state_dict" in ckpt:
                    state_dict = ckpt["state_dict"]
                    model_args = ckpt.get("model_args", {})
                else:
                    state_dict = ckpt
                    model_args = {}
                
                print(f"State dict extracted. Contains {len(state_dict)} keys.")
                
                # Create model instance
                print("Creating model instance...")
                self.model = RNAStructurePredictor(**model_args).to(self.device)
                print(f"Model created. Architecture:\n{self.model}")
                
                # Load state dict
                print("Loading state dict into model...")
                self.model.load_state_dict(state_dict, strict=False)
                
                self.model.eval()
                print("Model set to evaluation mode")
                print(f"Model loaded successfully from HuggingFace repository")
                return True
            
            else:
                # Original code for loading from local path
                print(f"File exists: {os.path.exists(model_path)}")
                if os.path.exists(model_path):
                    print(f"File size: {os.path.getsize(model_path)} bytes")
                
                print("Loading checkpoint...")
                ckpt = torch.load(model_path, map_location=self.device)
                print(f"Checkpoint loaded successfully. Type: {type(ckpt)}")
                
                # Extract state_dict and model_args
                if isinstance(ckpt, dict):
                    print(f"Checkpoint keys: {list(ckpt.keys())}")
                    if "state_dict" in ckpt:
                        state_dict = ckpt["state_dict"]
                        print(f"Number of keys in state_dict: {len(state_dict)}")
                        print(f"First 5 keys in state_dict: {list(state_dict.keys())[:5]}")
                    else:
                        state_dict = ckpt
                    model_args = ckpt.get("model_args", {})
                else:
                    print(f"Checkpoint is not a dict, type: {type(ckpt)}")
                    if isinstance(ckpt, torch.nn.Module):
                        print("Checkpoint appears to be a PyTorch model directly")
                        self.model = ckpt.to(self.device)
                        self.model.eval()
                        print("Model set to evaluation mode")
                        print(f"Model loaded successfully from {model_path}")
                        print("==== END MODEL LOADING DIAGNOSTICS ====\n")
                        return True
                    elif hasattr(ckpt, 'keys'):
                        print(f"Checkpoint has keys: {list(ckpt.keys())[:10]}")
                    state_dict = ckpt
                    model_args = {}
                
                print(f"State dict extracted. Type: {type(state_dict)}")
                print(f"Model args: {model_args}")
                
                # Create model instance
                print("Creating model instance...")
                self.model = RNAStructurePredictor(**model_args).to(self.device)
                print(f"Model created. Architecture:\n{self.model}")
                
                # Compare keys before loading
                model_keys = set(self.model.state_dict().keys())
                loaded_keys = set(state_dict.keys())
                
                # Check for missing or extra keys
                missing_keys = model_keys - loaded_keys
                extra_keys = loaded_keys - model_keys
                
                print(f"Model has {len(model_keys)} parameters, checkpoint has {len(loaded_keys)} parameters")
                if missing_keys:
                    print(f"Warning: {len(missing_keys)} keys are missing in checkpoint")
                    print(f"First 5 missing keys: {list(missing_keys)[:5]}")
                if extra_keys:
                    print(f"Warning: {len(extra_keys)} extra keys in checkpoint not used by model")
                    print(f"First 5 extra keys: {list(extra_keys)[:5]}")
                    
                # Check if the checkpoint keys need a prefix adjustment
                if len(missing_keys) > 0 and len(extra_keys) > 0:
                    print("Checking if keys need prefix adjustment...")
                    # Example: if checkpoint has 'module.layer.weight' but model expects 'layer.weight'
                    sample_extra = list(extra_keys)[0]
                    sample_missing = list(missing_keys)[0]
                    if '.' in sample_extra:
                        prefix = sample_extra.split('.')[0] + '.'
                        if sample_missing in sample_extra.replace(prefix, ''):
                            print(f"Found prefix mismatch. Checkpoint has prefix '{prefix}'")
                            # Create new state dict with adjusted keys
                            new_state_dict = {}
                            for k, v in state_dict.items():
                                if k.startswith(prefix):
                                    new_state_dict[k.replace(prefix, '')] = v
                                else:
                                    new_state_dict[k] = v
                            state_dict = new_state_dict
                            print(f"Adjusted state dict, now has {len(state_dict)} keys")
                
                # Load state dict
                print("Loading state dict into model...")
                self.model.load_state_dict(state_dict, strict=False)
                
                self.model.eval()
                print("Model set to evaluation mode")
                print(f"Model loaded successfully from {model_path}")
                print("==== END MODEL LOADING DIAGNOSTICS ====\n")
                return True
                
        except Exception as e:
            print(f"Error loading model: {e}")
            print(f"Detailed traceback:")
            print(traceback.format_exc())
            # Create a dummy model for testing
            self.model = None
            print("Using dummy model for testing due to error")
            return False

    def _encode_sequence(self, seq):
        base_to_idx = {"A": 0, "C": 1, "G": 2, "U": 3}
        idx = [base_to_idx.get(b.upper(), 4) for b in seq]
        return torch.tensor(idx, dtype=torch.long)

    def _sliding_windows(self, length, window=WINDOW, overlap=OVERLAP):
        step = window - overlap
        for s in range(0, length, step):
            e = min(s + window, length)
            yield s, e
            if e == length:
                break

    def _get_description_embedding(self, desc):
        if self.desc_encoder is None:
            return torch.zeros(768, dtype=torch.float32, device=self.device)
            
        if desc not in self.desc_cache:
            try:
                vec = (
                    self.desc_encoder.encode(desc).clone().detach()
                    if desc else torch.zeros(768)
                ).to(dtype=torch.float32, device=self.device)
                self.desc_cache[desc] = vec
            except Exception as e:
                print(f"Error encoding description: {e}")
                print(traceback.format_exc())
                vec = torch.zeros(768, dtype=torch.float32, device=self.device)
                self.desc_cache[desc] = vec
        return self.desc_cache[desc]

    def _predict_structure(self, seq, desc_vec):
        """Predict 3D structure for a single sequence"""
        if self.model is None:
            print("Model is None, using dummy prediction")
            return self._generate_dummy_prediction(seq)
            
        L = len(seq)
        print(f"Predicting structure for sequence of length {L}")
        acc = torch.zeros(N_STRUCT, L, 3, device=self.device)
        cnts = torch.zeros(L, device=self.device)

        for s, e in self._sliding_windows(L):
            win_seq = seq[s:e]
            Lw = len(win_seq)
            print(f"Processing window {s}:{e}, length {Lw}")

            # sequence tensor
            seq_tensor = self._encode_sequence(win_seq).unsqueeze(0).to(self.device)

            # torsion: pad zeros at first / last base
            if self.torsion_model is None:
                print("Torsion model is None, using random values")
                tor_raw = torch.rand(Lw-2, 9)
            else:
                try:
                    tor_raw = self.torsion_model.predict(win_seq)
                    print(f"Torsion prediction shape: {tor_raw.shape}")
                except Exception as e:
                    print(f"Error predicting torsion: {e}")
                    print(traceback.format_exc())
                    tor_raw = torch.rand(Lw-2, 9)
                    
            torsion = torch.zeros(Lw, 9, dtype=torch.float32, device=self.device)
            if Lw > 2:
                torsion[1:-1] = tor_raw.to(dtype=torch.float32)

            # BPPM from RNAfold
            try:
                print("Generating BPPM...")
                bppm = torch.tensor(
                    ru.get_bppm_from_sequence_cached(win_seq),
                    dtype=torch.float32,
                    device=self.device
                )
                print(f"BPPM shape: {bppm.shape}")
            except Exception as e:
                print(f"BPPM generation failed: {e}. Using random matrix.")
                print(traceback.format_exc())
                bppm = torch.rand(Lw, Lw, device=self.device)

            # forward pass
            try:
                print("Running model forward pass...")
                with torch.no_grad():
                    out = self.model(
                        seq=seq_tensor,
                        bppm=bppm.unsqueeze(0),
                        torsion=torsion.unsqueeze(0),
                        description_emb=desc_vec.unsqueeze(0),
                    )
                print(f"Model output shape: {out.shape}")
                
                acc[:, s:e, :] += out.squeeze(0)
                cnts[s:e] += 1
                print(f"Updated accumulator for positions {s}:{e}")
            except Exception as e:
                print(f"Error in model forward pass: {e}")
                print(traceback.format_exc())
                print("Using dummy data for this window")
                dummy_coords = self._generate_dummy_prediction(win_seq)
                acc[:, s:e, :] += torch.tensor(dummy_coords, device=self.device)
                cnts[s:e] += 1

        acc /= cnts.clamp(min=1.0)[None, :, None]
        print(f"Final prediction shape: {acc.shape}")
        return acc.cpu().numpy() * SCALE

    def _generate_dummy_prediction(self, seq):
        """Generate dummy prediction data for testing"""
        L = len(seq)
        coords = np.zeros((N_STRUCT, L, 3))
        
        # Generate a simple helix shape
        for n in range(N_STRUCT):
            for i in range(L):
                t = i * 0.5 + n * 0.2  # Parameter along helix
                coords[n, i, 0] = 10 * np.cos(t) + np.random.normal(0, 0.5)
                coords[n, i, 1] = 10 * np.sin(t) + np.random.normal(0, 0.5)
                coords[n, i, 2] = 2 * i + np.random.normal(0, 0.3)
                
        print(f"Generated dummy prediction with shape {coords.shape}")
        return coords

    def predict(self, seq, desc=""):
        """Public method to predict structure and format as DataFrame"""
        # Validate sequence
        seq = seq.upper()
        if not all(b in "ACGU" for b in seq):
            raise ValueError("Invalid RNA sequence. Only A, C, G, and U bases allowed.")
        
        print(f"Predicting structure for sequence: {seq[:20]}{'...' if len(seq) > 20 else ''}")    
        # Get description embedding
        desc_vec = self._get_description_embedding(desc)
        
        # Predict 3D coordinates
        coords = self._predict_structure(seq, desc_vec)  # (5,L,3)
        
        # Format as DataFrame
        records = []
        for i, base in enumerate(seq, start=1):
            flat = coords[:, i-1, :].reshape(-1).tolist()
            records.append([f"pred_{i}", base, i] + flat)
            
        # Create DataFrame
        header = ["ID", "resname", "resid"]
        for n in range(1, N_STRUCT + 1):
            header += [f"x_{n}", f"y_{n}", f"z_{n}"]
        
        result_df = pd.DataFrame.from_records(records, columns=header)
        print(f"Created DataFrame with shape {result_df.shape}")
        return result_df