JoannaKOKO's picture
Update app.py
ff72da5 verified
raw
history blame contribute delete
7.75 kB
import os
from huggingface_hub import login
import gradio as gr
from PIL import Image
from transformers import AutoProcessor, AutoModelForImageTextToText
import torch
import spaces
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
# Function to process vision information
def process_vision_info(messages: list[dict]) -> list[Image.Image]:
image_inputs = []
for msg in messages:
content = msg.get("content", [])
if not isinstance(content, list):
content = [content]
for element in content:
if isinstance(element, dict) and ("image" in element or element.get("type") == "image"):
image = element["image"] if "image" in element else element
image_inputs.append(image.convert("RGB"))
return image_inputs
# Load image model and processor on CPU
def load_image_model():
model_name = "JoannaKOKO/Gemma3-4b_tarot"
model = AutoModelForImageTextToText.from_pretrained(
model_name,
device_map="cpu",
torch_dtype=torch.bfloat16,
attn_implementation="eager",
)
processor = AutoProcessor.from_pretrained(model_name)
return processor, model
# Load text model on CPU
def load_text_model():
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
return model, tokenizer
# Generate card description with ZeroGPU
@spaces.GPU
def generate_description(sample, model, processor):
# Ensure the model is on GPU
model.to('cuda')
system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.'
messages = [
{"role": "system", "content": [{"type": "text", "text": system_message}]},
{"role": "user", "content": [
{"type": "image", "image": sample["image"]},
{"type": "text", "text": sample["prompt"]},
]},
]
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs = process_vision_info(messages)
inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to("cuda")
stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")]
generated_ids = model.generate(
**inputs,
max_new_tokens=256,
top_p=1.0,
do_sample=True,
temperature=0.8,
eos_token_id=stop_token_ids,
disable_compile=True
)
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
# Move trimmed IDs to CPU before decoding
generated_ids_trimmed_cpu = [ids.cpu() for ids in generated_ids_trimmed]
output_text = processor.batch_decode(
generated_ids_trimmed_cpu,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)
return output_text[0]
# Generate tarot interpretation with ZeroGPU
@spaces.GPU
def generate_interpretation(question, cards, model, tokenizer):
# Ensure the model is on GPU
model.to('cuda')
prompt = f"""Analyze this tarot reading for the question: {question}
Cards:
1. Reason: {cards[0]}
2. Result: {cards[1]}
3. Recommendation: {cards[2]}
Provide a professional interpretation covering:
- Individual card meanings in their positions
- Combined message and symbolism
- Practical advice
- Potential outcomes"""
messages = [
{"role": "system", "content": "You are a Tarot Card Explainer provideing relevant suggestions based on tarot card name"},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to("cuda")
generated_ids = model.generate(**model_inputs, max_new_tokens=512)
generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
# Move output to CPU before decoding
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return response
def main():
"""
Main function to set up and launch the Gradio tarot reading application.
Handles authentication, model loading, and interface creation.
"""
# Authenticate with Hugging Face
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
raise ValueError("HF_TOKEN not found in environment variables!")
login(token=hf_token)
# Load models on CPU
image_processor, image_model = load_image_model()
text_model, text_tokenizer = load_text_model()
# Define the tarot processing function
def process_tarot(question, reason_img, result_img, recommendation_img):
"""
Process the user's question and tarot card images to generate a reading.
Uses loaded models for card identification and interpretation.
"""
try:
# Validate image uploads
if any(img is None for img in [reason_img, result_img, recommendation_img]):
return "Please upload all three cards!"
# Generate descriptions for each card using GPU
cards = []
for img in [reason_img, result_img, recommendation_img]:
sample = {
"prompt": "Please tell me the name of the tarot card in this image, specify 'reversed' if it is. ",
"image": img.convert("RGB")
}
card = generate_description(sample, image_model, image_processor)
cards.append(card)
output = "### Identifying Card Name...\n"
# Generate the full interpretation using GPU
interpretation = generate_interpretation(question, cards, text_model, text_tokenizer)
card_cat = ['Reason Card', 'Result Card', 'Recommendation Card']
# Format the output
output += "### Card Analysis\n"
for i, card in enumerate(cards, 1):
output += f"**{card_cat[i-1]}:** {card}\n\n"
output += "### Full Interpretation\n"
output += interpretation
output += "\n\n**Reading Ends.**"
return output
except Exception as e:
return f"Error in reading: {str(e)}"
# Set up the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# 🔮 Acrane Intelligence (A.I.)")
gr.Markdown("### Artifical Intelligence Supported Tarot Reading Application")
question = gr.Textbox(
label="Your Question",
placeholder="Enter your question for the cards...",
lines=3
)
with gr.Row():
reason_img = gr.Image(label="Reason Card", type="pil")
result_img = gr.Image(label="Result Card", type="pil")
recommendation_img = gr.Image(label="Recommendation Card", type="pil")
submit_btn = gr.Button("Perform Reading")
output = gr.Markdown()
# Connect the button to the processing function
submit_btn.click(
fn=lambda: "Reading in progress...", # Show progress message
inputs=None,
outputs=output
).then(
fn=process_tarot, # Run the tarot reading
inputs=[question, reason_img, result_img, recommendation_img], # Pass all inputs
outputs=output # Update the same output with the result
)
# Launch the application
demo.launch()
# Entry point of the script
if __name__ == "__main__":
main()