Spaces:
Sleeping
Sleeping
import os | |
from huggingface_hub import login | |
import gradio as gr | |
from PIL import Image | |
from transformers import AutoProcessor, AutoModelForImageTextToText | |
import torch | |
import spaces | |
from peft import PeftModel | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
# Function to process vision information | |
def process_vision_info(messages: list[dict]) -> list[Image.Image]: | |
image_inputs = [] | |
for msg in messages: | |
content = msg.get("content", []) | |
if not isinstance(content, list): | |
content = [content] | |
for element in content: | |
if isinstance(element, dict) and ("image" in element or element.get("type") == "image"): | |
image = element["image"] if "image" in element else element | |
image_inputs.append(image.convert("RGB")) | |
return image_inputs | |
# Load image model and processor on CPU | |
def load_image_model(): | |
model_name = "JoannaKOKO/Gemma3-4b_tarot" | |
model = AutoModelForImageTextToText.from_pretrained( | |
model_name, | |
device_map="cpu", | |
torch_dtype=torch.bfloat16, | |
attn_implementation="eager", | |
) | |
processor = AutoProcessor.from_pretrained(model_name) | |
return processor, model | |
# Load text model on CPU | |
def load_text_model(): | |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct") | |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-3B-Instruct") | |
return model, tokenizer | |
# Generate card description with ZeroGPU | |
def generate_description(sample, model, processor): | |
# Ensure the model is on GPU | |
model.to('cuda') | |
system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.' | |
messages = [ | |
{"role": "system", "content": [{"type": "text", "text": system_message}]}, | |
{"role": "user", "content": [ | |
{"type": "image", "image": sample["image"]}, | |
{"type": "text", "text": sample["prompt"]}, | |
]}, | |
] | |
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
image_inputs = process_vision_info(messages) | |
inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to("cuda") | |
stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")] | |
generated_ids = model.generate( | |
**inputs, | |
max_new_tokens=256, | |
top_p=1.0, | |
do_sample=True, | |
temperature=0.8, | |
eos_token_id=stop_token_ids, | |
disable_compile=True | |
) | |
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] | |
# Move trimmed IDs to CPU before decoding | |
generated_ids_trimmed_cpu = [ids.cpu() for ids in generated_ids_trimmed] | |
output_text = processor.batch_decode( | |
generated_ids_trimmed_cpu, | |
skip_special_tokens=True, | |
clean_up_tokenization_spaces=False | |
) | |
return output_text[0] | |
# Generate tarot interpretation with ZeroGPU | |
def generate_interpretation(question, cards, model, tokenizer): | |
# Ensure the model is on GPU | |
model.to('cuda') | |
prompt = f"""Analyze this tarot reading for the question: {question} | |
Cards: | |
1. Reason: {cards[0]} | |
2. Result: {cards[1]} | |
3. Recommendation: {cards[2]} | |
Provide a professional interpretation covering: | |
- Individual card meanings in their positions | |
- Combined message and symbolism | |
- Practical advice | |
- Potential outcomes""" | |
messages = [ | |
{"role": "system", "content": "You are a Tarot Card Explainer provideing relevant suggestions based on tarot card name"}, | |
{"role": "user", "content": prompt} | |
] | |
text = tokenizer.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True | |
) | |
model_inputs = tokenizer([text], return_tensors="pt").to("cuda") | |
generated_ids = model.generate(**model_inputs, max_new_tokens=512) | |
generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)] | |
# Move output to CPU before decoding | |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
return response | |
def main(): | |
""" | |
Main function to set up and launch the Gradio tarot reading application. | |
Handles authentication, model loading, and interface creation. | |
""" | |
# Authenticate with Hugging Face | |
hf_token = os.environ.get("HF_TOKEN") | |
if not hf_token: | |
raise ValueError("HF_TOKEN not found in environment variables!") | |
login(token=hf_token) | |
# Load models on CPU | |
image_processor, image_model = load_image_model() | |
text_model, text_tokenizer = load_text_model() | |
# Define the tarot processing function | |
def process_tarot(question, reason_img, result_img, recommendation_img): | |
""" | |
Process the user's question and tarot card images to generate a reading. | |
Uses loaded models for card identification and interpretation. | |
""" | |
try: | |
# Validate image uploads | |
if any(img is None for img in [reason_img, result_img, recommendation_img]): | |
return "Please upload all three cards!" | |
# Generate descriptions for each card using GPU | |
cards = [] | |
for img in [reason_img, result_img, recommendation_img]: | |
sample = { | |
"prompt": "Please tell me the name of the tarot card in this image, specify 'reversed' if it is. ", | |
"image": img.convert("RGB") | |
} | |
card = generate_description(sample, image_model, image_processor) | |
cards.append(card) | |
output = "### Identifying Card Name...\n" | |
# Generate the full interpretation using GPU | |
interpretation = generate_interpretation(question, cards, text_model, text_tokenizer) | |
card_cat = ['Reason Card', 'Result Card', 'Recommendation Card'] | |
# Format the output | |
output += "### Card Analysis\n" | |
for i, card in enumerate(cards, 1): | |
output += f"**{card_cat[i-1]}:** {card}\n\n" | |
output += "### Full Interpretation\n" | |
output += interpretation | |
output += "\n\n**Reading Ends.**" | |
return output | |
except Exception as e: | |
return f"Error in reading: {str(e)}" | |
# Set up the Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# 🔮 Acrane Intelligence (A.I.)") | |
gr.Markdown("### Artifical Intelligence Supported Tarot Reading Application") | |
question = gr.Textbox( | |
label="Your Question", | |
placeholder="Enter your question for the cards...", | |
lines=3 | |
) | |
with gr.Row(): | |
reason_img = gr.Image(label="Reason Card", type="pil") | |
result_img = gr.Image(label="Result Card", type="pil") | |
recommendation_img = gr.Image(label="Recommendation Card", type="pil") | |
submit_btn = gr.Button("Perform Reading") | |
output = gr.Markdown() | |
# Connect the button to the processing function | |
submit_btn.click( | |
fn=lambda: "Reading in progress...", # Show progress message | |
inputs=None, | |
outputs=output | |
).then( | |
fn=process_tarot, # Run the tarot reading | |
inputs=[question, reason_img, result_img, recommendation_img], # Pass all inputs | |
outputs=output # Update the same output with the result | |
) | |
# Launch the application | |
demo.launch() | |
# Entry point of the script | |
if __name__ == "__main__": | |
main() |