Spaces:

JoannaKOKO
/

Tarot_Explainer_gpu

Sleeping

File size: 7,751 Bytes

a314f0c
 
 
 
af9fb37
a314f0c
 
af9fb37
148b365
a314f0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c28d34
 
af9fb37
a314f0c
 
 
 
0dcb9ef
a314f0c
 
 
 
 
 
 
 
 
 
 
0dcb9ef
a314f0c
 
 
 
 
 
 
 
 
 
 
0dcb9ef
 
a314f0c
0dcb9ef
a314f0c
 
 
 
 
 
 
af9fb37
0dcb9ef
 
a314f0c
 
 
 
 
 
 
 
 
 
 
7c28d34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0dcb9ef
9e47dd3
7c28d34
a314f0c
 
 
 
 
 
 
 
 
 
 
 
 
 
af9fb37
a314f0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0dcb9ef
a314f0c
 
af9fb37
a314f0c
f3e2d02
 
a314f0c
 
 
dac01f4
a314f0c
 
f3e2d02
a314f0c
 
 
 
 
 
f3e2d02
 
a314f0c
 
 
 
 
 
 
 
 
 
 
 
 
 
ff72da5
 
 
 
 
 
 
 
a314f0c
f3e2d02
a314f0c

import os
from huggingface_hub import login
import gradio as gr
from PIL import Image
from transformers import AutoProcessor, AutoModelForImageTextToText
import torch
import spaces
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

# Function to process vision information
def process_vision_info(messages: list[dict]) -> list[Image.Image]:
    image_inputs = []
    for msg in messages:
        content = msg.get("content", [])
        if not isinstance(content, list):
            content = [content]
        for element in content:
            if isinstance(element, dict) and ("image" in element or element.get("type") == "image"):
                image = element["image"] if "image" in element else element
                image_inputs.append(image.convert("RGB"))
    return image_inputs

# Load image model and processor on CPU
def load_image_model():
    model_name = "JoannaKOKO/Gemma3-4b_tarot"
    model = AutoModelForImageTextToText.from_pretrained(
        model_name,
        device_map="cpu",
        torch_dtype=torch.bfloat16,
        attn_implementation="eager",
    )
    processor = AutoProcessor.from_pretrained(model_name)
    return processor, model

# Load text model on CPU
def load_text_model():
    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
    model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
    return model, tokenizer

# Generate card description with ZeroGPU
@spaces.GPU
def generate_description(sample, model, processor):
    # Ensure the model is on GPU
    model.to('cuda')
    system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.'
    messages = [
        {"role": "system", "content": [{"type": "text", "text": system_message}]},
        {"role": "user", "content": [
            {"type": "image", "image": sample["image"]},
            {"type": "text", "text": sample["prompt"]},
        ]},
    ]
    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    image_inputs = process_vision_info(messages)
    inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to("cuda")
    stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")]
    generated_ids = model.generate(
        **inputs,
        max_new_tokens=256,
        top_p=1.0,
        do_sample=True,
        temperature=0.8,
        eos_token_id=stop_token_ids,
        disable_compile=True
    )
    generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
    # Move trimmed IDs to CPU before decoding
    generated_ids_trimmed_cpu = [ids.cpu() for ids in generated_ids_trimmed]
    output_text = processor.batch_decode(
        generated_ids_trimmed_cpu,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )
    return output_text[0]

# Generate tarot interpretation with ZeroGPU
@spaces.GPU
def generate_interpretation(question, cards, model, tokenizer):
    # Ensure the model is on GPU
    model.to('cuda')
    prompt = f"""Analyze this tarot reading for the question: {question}

Cards:
1. Reason: {cards[0]}
2. Result: {cards[1]}
3. Recommendation: {cards[2]}
Provide a professional interpretation covering:
- Individual card meanings in their positions
- Combined message and symbolism
- Practical advice
- Potential outcomes"""

    messages = [
        {"role": "system", "content": "You are a Tarot Card Explainer provideing relevant suggestions based on tarot card name"},
        {"role": "user", "content": prompt}
    ]
    
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to("cuda")
    generated_ids = model.generate(**model_inputs, max_new_tokens=512)
    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
    
    # Move output to CPU before decoding
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

def main():
    """
    Main function to set up and launch the Gradio tarot reading application.
    Handles authentication, model loading, and interface creation.
    """
    # Authenticate with Hugging Face
    hf_token = os.environ.get("HF_TOKEN")
    if not hf_token:
        raise ValueError("HF_TOKEN not found in environment variables!")
    login(token=hf_token)

    # Load models on CPU
    image_processor, image_model = load_image_model()
    text_model, text_tokenizer = load_text_model()

    # Define the tarot processing function
    def process_tarot(question, reason_img, result_img, recommendation_img):
        """
        Process the user's question and tarot card images to generate a reading.
        Uses loaded models for card identification and interpretation.
        """
        try:
            # Validate image uploads
            if any(img is None for img in [reason_img, result_img, recommendation_img]):
                return "Please upload all three cards!"

            # Generate descriptions for each card using GPU
            cards = []
            for img in [reason_img, result_img, recommendation_img]:
                sample = {
                    "prompt": "Please tell me the name of the tarot card in this image, specify 'reversed' if it is. ",
                    "image": img.convert("RGB")
                }
                card = generate_description(sample, image_model, image_processor)
                cards.append(card)
            output = "### Identifying Card Name...\n"

            # Generate the full interpretation using GPU
            interpretation = generate_interpretation(question, cards, text_model, text_tokenizer)

            card_cat = ['Reason Card', 'Result Card', 'Recommendation Card']

            # Format the output
            output += "### Card Analysis\n"
            for i, card in enumerate(cards, 1):
                output += f"**{card_cat[i-1]}:** {card}\n\n"
            output += "### Full Interpretation\n"
            output += interpretation
            output += "\n\n**Reading Ends.**"
            return output
        except Exception as e:
            return f"Error in reading: {str(e)}"

    # Set up the Gradio interface
    with gr.Blocks() as demo:
        gr.Markdown("# 🔮 Acrane Intelligence (A.I.)")
        gr.Markdown("### Artifical Intelligence Supported Tarot Reading Application")
        question = gr.Textbox(
            label="Your Question",
            placeholder="Enter your question for the cards...",
            lines=3
        )
        with gr.Row():
            reason_img = gr.Image(label="Reason Card", type="pil")
            result_img = gr.Image(label="Result Card", type="pil")
            recommendation_img = gr.Image(label="Recommendation Card", type="pil")
        submit_btn = gr.Button("Perform Reading")
        output = gr.Markdown()

        # Connect the button to the processing function
        submit_btn.click(
        fn=lambda: "Reading in progress...",  #  Show progress message
        inputs=None,                         
        outputs=output                       
    ).then(
        fn=process_tarot,                    #  Run the tarot reading
        inputs=[question, reason_img, result_img, recommendation_img],  # Pass all inputs
        outputs=output                       # Update the same output with the result
    )

    # Launch the application
    demo.launch()

# Entry point of the script
if __name__ == "__main__":
    main()