Spaces:

JoannaKOKO
/

Tarot_Explainer_gpu

Sleeping

App Files Files Community

Tarot_Explainer_gpu / app.py

JoannaKOKO

Update app.py

ff72da5 verified 6 months ago

raw

history blame contribute delete

7.75 kB

	import os
	from huggingface_hub import login
	import gradio as gr
	from PIL import Image
	from transformers import AutoProcessor, AutoModelForImageTextToText
	import torch
	import spaces
	from peft import PeftModel
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# Function to process vision information
	def process_vision_info(messages: list[dict]) -> list[Image.Image]:
	image_inputs = []
	for msg in messages:
	content = msg.get("content", [])
	if not isinstance(content, list):
	content = [content]
	for element in content:
	if isinstance(element, dict) and ("image" in element or element.get("type") == "image"):
	image = element["image"] if "image" in element else element
	image_inputs.append(image.convert("RGB"))
	return image_inputs

	# Load image model and processor on CPU
	def load_image_model():
	model_name = "JoannaKOKO/Gemma3-4b_tarot"
	model = AutoModelForImageTextToText.from_pretrained(
	model_name,
	device_map="cpu",
	torch_dtype=torch.bfloat16,
	attn_implementation="eager",
	)
	processor = AutoProcessor.from_pretrained(model_name)
	return processor, model

	# Load text model on CPU
	def load_text_model():
	tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
	model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-3B-Instruct")
	return model, tokenizer

	# Generate card description with ZeroGPU
	@spaces.GPU
	def generate_description(sample, model, processor):
	# Ensure the model is on GPU
	model.to('cuda')
	system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.'
	messages = [
	{"role": "system", "content": [{"type": "text", "text": system_message}]},
	{"role": "user", "content": [
	{"type": "image", "image": sample["image"]},
	{"type": "text", "text": sample["prompt"]},
	]},
	]
	text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	image_inputs = process_vision_info(messages)
	inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to("cuda")
	stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")]
	generated_ids = model.generate(
	**inputs,
	max_new_tokens=256,
	top_p=1.0,
	do_sample=True,
	temperature=0.8,
	eos_token_id=stop_token_ids,
	disable_compile=True
	)
	generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
	# Move trimmed IDs to CPU before decoding
	generated_ids_trimmed_cpu = [ids.cpu() for ids in generated_ids_trimmed]
	output_text = processor.batch_decode(
	generated_ids_trimmed_cpu,
	skip_special_tokens=True,
	clean_up_tokenization_spaces=False
	)
	return output_text[0]

	# Generate tarot interpretation with ZeroGPU
	@spaces.GPU
	def generate_interpretation(question, cards, model, tokenizer):
	# Ensure the model is on GPU
	model.to('cuda')
	prompt = f"""Analyze this tarot reading for the question: {question}

	Cards:
	1. Reason: {cards[0]}
	2. Result: {cards[1]}
	3. Recommendation: {cards[2]}
	Provide a professional interpretation covering:
	- Individual card meanings in their positions
	- Combined message and symbolism
	- Practical advice
	- Potential outcomes"""

	messages = [
	{"role": "system", "content": "You are a Tarot Card Explainer provideing relevant suggestions based on tarot card name"},
	{"role": "user", "content": prompt}
	]

	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)
	model_inputs = tokenizer([text], return_tensors="pt").to("cuda")
	generated_ids = model.generate(**model_inputs, max_new_tokens=512)
	generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]

	# Move output to CPU before decoding
	response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return response

	def main():
	"""
	Main function to set up and launch the Gradio tarot reading application.
	Handles authentication, model loading, and interface creation.
	"""
	# Authenticate with Hugging Face
	hf_token = os.environ.get("HF_TOKEN")
	if not hf_token:
	raise ValueError("HF_TOKEN not found in environment variables!")
	login(token=hf_token)

	# Load models on CPU
	image_processor, image_model = load_image_model()
	text_model, text_tokenizer = load_text_model()

	# Define the tarot processing function
	def process_tarot(question, reason_img, result_img, recommendation_img):
	"""
	Process the user's question and tarot card images to generate a reading.
	Uses loaded models for card identification and interpretation.
	"""
	try:
	# Validate image uploads
	if any(img is None for img in [reason_img, result_img, recommendation_img]):
	return "Please upload all three cards!"

	# Generate descriptions for each card using GPU
	cards = []
	for img in [reason_img, result_img, recommendation_img]:
	sample = {
	"prompt": "Please tell me the name of the tarot card in this image, specify 'reversed' if it is. ",
	"image": img.convert("RGB")
	}
	card = generate_description(sample, image_model, image_processor)
	cards.append(card)
	output = "### Identifying Card Name...\n"

	# Generate the full interpretation using GPU
	interpretation = generate_interpretation(question, cards, text_model, text_tokenizer)

	card_cat = ['Reason Card', 'Result Card', 'Recommendation Card']

	# Format the output
	output += "### Card Analysis\n"
	for i, card in enumerate(cards, 1):
	output += f"{card_cat[i-1]}: {card}\n\n"
	output += "### Full Interpretation\n"
	output += interpretation
	output += "\n\nReading Ends."
	return output
	except Exception as e:
	return f"Error in reading: {str(e)}"

	# Set up the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# 🔮 Acrane Intelligence (A.I.)")
	gr.Markdown("### Artifical Intelligence Supported Tarot Reading Application")
	question = gr.Textbox(
	label="Your Question",
	placeholder="Enter your question for the cards...",
	lines=3
	)
	with gr.Row():
	reason_img = gr.Image(label="Reason Card", type="pil")
	result_img = gr.Image(label="Result Card", type="pil")
	recommendation_img = gr.Image(label="Recommendation Card", type="pil")
	submit_btn = gr.Button("Perform Reading")
	output = gr.Markdown()

	# Connect the button to the processing function
	submit_btn.click(
	fn=lambda: "Reading in progress...", # Show progress message
	inputs=None,
	outputs=output
	).then(
	fn=process_tarot, # Run the tarot reading
	inputs=[question, reason_img, result_img, recommendation_img], # Pass all inputs
	outputs=output # Update the same output with the result
	)

	# Launch the application
	demo.launch()

	# Entry point of the script
	if __name__ == "__main__":
	main()