import os
from huggingface_hub import login
import gradio as gr
from PIL import Image
from transformers import AutoProcessor, AutoModelForImageTextToText
import torch
import spaces
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

# Function to process vision information
def process_vision_info(messages: list[dict]) -> list[Image.Image]:
    image_inputs = []
    for msg in messages:
        content = msg.get("content", [])
        if not isinstance(content, list):
            content = [content]
        for element in content:
            if isinstance(element, dict) and ("image" in element or element.get("type") == "image"):
                image = element["image"] if "image" in element else element
                image_inputs.append(image.convert("RGB"))
    return image_inputs

# Load image model and processor on CPU
def load_image_model():
    model_name = "JoannaKOKO/Gemma3-4b_tarot"
    model = AutoModelForImageTextToText.from_pretrained(
        model_name,
        device_map="cpu",
        torch_dtype=torch.bfloat16,
        attn_implementation="eager",
    )
    processor = AutoProcessor.from_pretrained(model_name)
    return processor, model

# Load text model on CPU
def load_text_model():
    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
    model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
    return model, tokenizer

# Generate card description with ZeroGPU
@spaces.GPU
def generate_description(sample, model, processor):
    # Ensure the model is on GPU
    model.to('cuda')
    system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.'
    messages = [
        {"role": "system", "content": [{"type": "text", "text": system_message}]},
        {"role": "user", "content": [
            {"type": "image", "image": sample["image"]},
            {"type": "text", "text": sample["prompt"]},
        ]},
    ]
    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    image_inputs = process_vision_info(messages)
    inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to("cuda")
    stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")]
    generated_ids = model.generate(
        **inputs,
        max_new_tokens=256,
        top_p=1.0,
        do_sample=True,
        temperature=0.8,
        eos_token_id=stop_token_ids,
        disable_compile=True
    )
    generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
    # Move trimmed IDs to CPU before decoding
    generated_ids_trimmed_cpu = [ids.cpu() for ids in generated_ids_trimmed]
    output_text = processor.batch_decode(
        generated_ids_trimmed_cpu,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )
    return output_text[0]

# Generate tarot interpretation with ZeroGPU
@spaces.GPU
def generate_interpretation(question, cards, model, tokenizer):
    # Ensure the model is on GPU
    model.to('cuda')
    prompt = f"""Analyze this tarot reading for the question: {question}

Cards:
1. Reason: {cards[0]}
2. Result: {cards[1]}
3. Recommendation: {cards[2]}
Provide a professional interpretation covering:
- Individual card meanings in their positions
- Combined message and symbolism
- Practical advice
- Potential outcomes"""

    messages = [
        {"role": "system", "content": "You are a Tarot Card Explainer provideing relevant suggestions based on tarot card name"},
        {"role": "user", "content": prompt}
    ]
    
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to("cuda")
    generated_ids = model.generate(**model_inputs, max_new_tokens=512)
    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
    
    # Move output to CPU before decoding
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

def main():
    """
    Main function to set up and launch the Gradio tarot reading application.
    Handles authentication, model loading, and interface creation.
    """
    # Authenticate with Hugging Face
    hf_token = os.environ.get("HF_TOKEN")
    if not hf_token:
        raise ValueError("HF_TOKEN not found in environment variables!")
    login(token=hf_token)

    # Load models on CPU
    image_processor, image_model = load_image_model()
    text_model, text_tokenizer = load_text_model()

    # Define the tarot processing function
    def process_tarot(question, reason_img, result_img, recommendation_img):
        """
        Process the user's question and tarot card images to generate a reading.
        Uses loaded models for card identification and interpretation.
        """
        try:
            # Validate image uploads
            if any(img is None for img in [reason_img, result_img, recommendation_img]):
                return "Please upload all three cards!"

            # Generate descriptions for each card using GPU
            cards = []
            for img in [reason_img, result_img, recommendation_img]:
                sample = {
                    "prompt": "Please tell me the name of the tarot card in this image, specify 'reversed' if it is. ",
                    "image": img.convert("RGB")
                }
                card = generate_description(sample, image_model, image_processor)
                cards.append(card)
            output = "### Identifying Card Name...\n"

            # Generate the full interpretation using GPU
            interpretation = generate_interpretation(question, cards, text_model, text_tokenizer)

            card_cat = ['Reason Card', 'Result Card', 'Recommendation Card']

            # Format the output
            output += "### Card Analysis\n"
            for i, card in enumerate(cards, 1):
                output += f"**{card_cat[i-1]}:** {card}\n\n"
            output += "### Full Interpretation\n"
            output += interpretation
            output += "\n\n**Reading Ends.**"
            return output
        except Exception as e:
            return f"Error in reading: {str(e)}"

    # Set up the Gradio interface
    with gr.Blocks() as demo:
        gr.Markdown("# 🔮 Acrane Intelligence (A.I.)")
        gr.Markdown("### Artifical Intelligence Supported Tarot Reading Application")
        question = gr.Textbox(
            label="Your Question",
            placeholder="Enter your question for the cards...",
            lines=3
        )
        with gr.Row():
            reason_img = gr.Image(label="Reason Card", type="pil")
            result_img = gr.Image(label="Result Card", type="pil")
            recommendation_img = gr.Image(label="Recommendation Card", type="pil")
        submit_btn = gr.Button("Perform Reading")
        output = gr.Markdown()

        # Connect the button to the processing function
        submit_btn.click(
        fn=lambda: "Reading in progress...",  #  Show progress message
        inputs=None,                         
        outputs=output                       
    ).then(
        fn=process_tarot,                    #  Run the tarot reading
        inputs=[question, reason_img, result_img, recommendation_img],  # Pass all inputs
        outputs=output                       # Update the same output with the result
    )

    # Launch the application
    demo.launch()

# Entry point of the script
if __name__ == "__main__":
    main()