from fastapi import FastAPI, File, UploadFile, HTTPException, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from huggingface_hub import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import logging
from dotenv import load_dotenv
import base64
import io
import re

# Set up logging to track application behavior and debug issues
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load Hugging Face API token from environment variables
API_TOKEN = os.getenv("HF_TOKEN")
if not API_TOKEN:
    raise ValueError("HUGGINGFACE_API_TOKEN environment variable not set. Set it in Space secrets.")

# Initialize FastAPI application
app = FastAPI()

# Enable CORS to allow frontend-backend communication from any origin
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Mount the 'static' directory to serve frontend assets (e.g., index.html, script.js, style.css)
app.mount("/static", StaticFiles(directory="static"), name="static")

# Configure the Hugging Face model for code generation
MODEL_NAME = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
model_dir = "./qwen_model"

# Log the working directory and model path for debugging
logger.info(f"Current working directory: {os.getcwd()}")
logger.info(f"Model directory path: {os.path.abspath(model_dir)}")

# Download the model if it doesn't exist
try:
    os.makedirs(model_dir, exist_ok=True)
    if not os.listdir(model_dir):  # Only download if directory is empty
        logger.info(f"Downloading model {MODEL_NAME} to {model_dir}")
        snapshot_download(repo_id=MODEL_NAME, token=API_TOKEN, local_dir=model_dir)
        logger.info(f"Model downloaded. Directory contents: {os.listdir(model_dir)}")
    else:
        logger.info(f"Model directory {model_dir} already contains files: {os.listdir(model_dir)}")
except Exception as e:
    logger.error(f"Failed to download model: {str(e)}")
    raise ValueError(f"Model download failed: {str(e)}")

# Load the model and tokenizer for code generation
logger.info(f"Loading model from {model_dir}")
tokenizer = AutoTokenizer.from_pretrained(model_dir, token=API_TOKEN)
model = AutoModelForCausalLM.from_pretrained(model_dir, token=API_TOKEN)

# Set pad_token_id to eos_token_id to avoid tokenizer warnings
tokenizer.pad_token_id = tokenizer.eos_token_id

# Create a text generation pipeline using the loaded model
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)  # CPU for free Space, change to 0 for GPU

# Create directory for uploaded Excel files
UPLOAD_DIR = "uploads"
os.makedirs(UPLOAD_DIR, exist_ok=True)

# Endpoint to handle Excel file uploads
@app.post("/upload/")
async def upload_file(file: UploadFile = File(...)):
    # Ensure the uploaded file is an Excel file (.xlsx)
    if not file.filename.endswith(".xlsx"):
        raise HTTPException(status_code=400, detail="File must be an Excel file (.xlsx)")

    # Save the file to the uploads directory
    file_path = os.path.join(UPLOAD_DIR, file.filename)
    with open(file_path, "wb") as buffer:
        buffer.write(await file.read())

    logger.info(f"File uploaded: {file.filename}")
    return {"filename": file.filename}

# Endpoint to generate a visualization based on a user prompt
@app.post("/generate-visualization/")
async def generate_visualization(prompt: str = Form(...), filename: str = Form(...)):
    # Check if the uploaded file exists
    file_path = os.path.join(UPLOAD_DIR, filename)
    if not os.path.exists(file_path):
        raise HTTPException(status_code=404, detail="File not found on server.")

    # Load the Excel file into a pandas DataFrame
    try:
        df = pd.read_excel(file_path)
        if df.empty:
            raise ValueError("Excel file is empty.")
        logger.info(f"DataFrame columns: {df.columns.tolist()}")
        logger.info(f"DataFrame preview:\n{df.head().to_string()}")
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Error reading Excel file: {str(e)}")

    # Create a prompt for the model, specifying the DataFrame and visualization requirements
    input_text = f"""
    Given the DataFrame 'df' with columns {', '.join(df.columns)} and preview:
    {df.head().to_string()}
    Write Python code to: {prompt}
    - Use ONLY 'df =pd.read_excel({filename})' (no external data loading like pd.read_csv, pd.read_excel, or creating a new DataFrame).
    - Use pandas (pd), matplotlib.pyplot (plt), or seaborn (sns).
    - Include axis labels and a title.
    - Output ONLY executable Python code. Do NOT include triple quotes, prose, Markdown, or text like 'Hint', 'Solution', or 'Here is the code'.
    """

    # Generate code using the model
    try:
        generated = generator(input_text, max_new_tokens=500, num_return_sequences=1)
        generated_code = generated[0]["generated_text"].replace(input_text, "").strip()
        logger.info(f"Generated code:\n{generated_code}")
    except Exception as e:
        logger.error(f"Error querying model: {str(e)}")
        return {
            "plot_base64": None,
            "generated_code": "",
            "error": f"Error querying model: {str(e)}"
        }

    # Handle empty generated code
    if not generated_code.strip():
        return {
            "plot_base64": None,
            "generated_code": "",
            "error": "No code generated by the AI model."
        }

    # Extract code block between ```python and ```, strictly requiring a valid code block
    code_block_pattern = r"```python\n(.*?)\n```"
    matches = list(re.finditer(code_block_pattern, generated_code, re.DOTALL))
    if matches:
        # Take the first code block for execution and display
        raw_code_block = matches[0].group(1).strip()  # Raw code for display
        executable_code = raw_code_block  # Will be cleaned for execution
        logger.info(f"Raw code block:\n{raw_code_block}")
    else:
        logger.error("No valid Python code block found in generated output.")
        return {
            "plot_base64": None,
            "generated_code": generated_code,
            "error": "No valid Python code block found in generated output."
        }

    # Clean the code for execution: remove comments, empty lines, disallowed data loading, plt.show(), and df redefinition
    executable_code = "\n".join(
        line.strip() for line in executable_code.splitlines()
        if line.strip() and
        not line.strip().startswith('#') and
        not any(kw in line for kw in ["pd.read_csv", "pd.read_excel", "plt.show", "df ="])
    ).strip()

    # Clean the raw code block for display: remove comments and empty lines, but keep other lines like pd.read_excel and plt.show()
    display_code = "\n".join(
        line.strip() for line in raw_code_block.splitlines()
        if line.strip() and
        not line.strip().startswith('#')
    ).strip()
    logger.info(f"Display code (comments removed):\n{display_code}")

    # Handle empty code after cleaning for execution
    if not executable_code:
        logger.error("No valid executable code after cleaning.")
        return {
            "plot_base64": None,
            "generated_code": display_code,
            "error": "Generated code was invalid (e.g., included data loading, df redefinition, or was empty)."
        }

    logger.info(f"Executable code:\n{executable_code}")

    # Execute the code and generate the plot
    try:
        exec_globals = {"pd": pd, "plt": plt, "sns": sns, "df": df}
        exec(executable_code, exec_globals)

        # Save the plot to a BytesIO buffer (no disk storage)
        buffer = io.BytesIO()
        plt.savefig(buffer, format="png", bbox_inches="tight")
        plt.close()
        buffer.seek(0)
        # Encode the plot as base64 for frontend display
        plot_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
    except Exception as e:
        logger.error(f"Error executing code:\n{executable_code}\nException: {str(e)}")
        return {
            "plot_base64": None,
            "generated_code": display_code,
            "error": f"Error executing code: {str(e)}"
        }

    # Return the plot, display code (without comments), and any error message
    return {
        "plot_base64": plot_base64,
        "generated_code": display_code,
        "error": None
    }

# Serve the frontend HTML
@app.get("/")
async def serve_frontend():
    with open("static/index.html", "r") as f:
        return HTMLResponse(content=f.read())