Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +174 -0
- requirements.txt +7 -0
app.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from huggingface_hub import login
|
3 |
+
import gradio as gr
|
4 |
+
from PIL import Image
|
5 |
+
from transformers import AutoProcessor, AutoModelForImageTextToText, pipeline
|
6 |
+
import torch
|
7 |
+
import spaces
|
8 |
+
|
9 |
+
# Function to process vision information
|
10 |
+
def process_vision_info(messages: list[dict]) -> list[Image.Image]:
|
11 |
+
image_inputs = []
|
12 |
+
for msg in messages:
|
13 |
+
content = msg.get("content", [])
|
14 |
+
if not isinstance(content, list):
|
15 |
+
content = [content]
|
16 |
+
for element in content:
|
17 |
+
if isinstance(element, dict) and ("image" in element or element.get("type") == "image"):
|
18 |
+
image = element["image"] if "image" in element else element
|
19 |
+
image_inputs.append(image.convert("RGB"))
|
20 |
+
return image_inputs
|
21 |
+
|
22 |
+
# Load image model and processor on CPU
|
23 |
+
def load_image_model():
|
24 |
+
model_name = "JoannaKOKO/Gemma3-4b_tarot"
|
25 |
+
model = AutoModelForImageTextToText.from_pretrained(
|
26 |
+
model_name,
|
27 |
+
device_map="cpu",
|
28 |
+
torch_dtype=torch.bfloat16,
|
29 |
+
attn_implementation="eager",
|
30 |
+
)
|
31 |
+
processor = AutoProcessor.from_pretrained(model_name)
|
32 |
+
return processor, model
|
33 |
+
|
34 |
+
# Load text model on CPU
|
35 |
+
def load_text_model():
|
36 |
+
return pipeline(
|
37 |
+
"text-generation",
|
38 |
+
model="tarotscientist/llama-2-7b-tarotreader",
|
39 |
+
device=-1 # Force CPU
|
40 |
+
)
|
41 |
+
|
42 |
+
# Generate card description with ZeroGPU
|
43 |
+
@spaces.GPU
|
44 |
+
def generate_description(sample, model, processor):
|
45 |
+
# Move the image model to GPU
|
46 |
+
model.to('cuda')
|
47 |
+
system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.'
|
48 |
+
messages = [
|
49 |
+
{"role": "system", "content": [{"type": "text", "text": system_message}]},
|
50 |
+
{"role": "user", "content": [
|
51 |
+
{"type": "image", "image": sample["image"]},
|
52 |
+
{"type": "text", "text": sample["prompt"]},
|
53 |
+
]},
|
54 |
+
]
|
55 |
+
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
56 |
+
image_inputs = process_vision_info(messages)
|
57 |
+
inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt")
|
58 |
+
inputs = inputs.to("cuda")
|
59 |
+
stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")]
|
60 |
+
generated_ids = model.generate(
|
61 |
+
**inputs,
|
62 |
+
max_new_tokens=256,
|
63 |
+
top_p=1.0,
|
64 |
+
do_sample=True,
|
65 |
+
temperature=0.8,
|
66 |
+
eos_token_id=stop_token_ids,
|
67 |
+
disable_compile=True
|
68 |
+
)
|
69 |
+
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
70 |
+
output_text = processor.batch_decode(
|
71 |
+
generated_ids_trimmed,
|
72 |
+
skip_special_tokens=True,
|
73 |
+
clean_up_tokenization_spaces=False
|
74 |
+
)
|
75 |
+
return output_text[0]
|
76 |
+
|
77 |
+
# Generate tarot interpretation with ZeroGPU
|
78 |
+
@spaces.GPU
|
79 |
+
def generate_interpretation(question, cards, model):
|
80 |
+
prompt = f"""Analyze this tarot reading for the question: {question}
|
81 |
+
|
82 |
+
Cards:
|
83 |
+
1. Reason: {cards[0]}
|
84 |
+
2. Result: {cards[1]}
|
85 |
+
3. Recommendation: {cards[2]}
|
86 |
+
Provide a professional interpretation covering:
|
87 |
+
- Individual card meanings in their positions
|
88 |
+
- Combined message and symbolism
|
89 |
+
- Practical advice
|
90 |
+
- Potential outcomes"""
|
91 |
+
# Use GPU for this inference call
|
92 |
+
response = model(prompt, max_length=1000, temperature=0.8, top_p=0.95)[0]['generated_text']
|
93 |
+
return response
|
94 |
+
|
95 |
+
def main():
|
96 |
+
"""
|
97 |
+
Main function to set up and launch the Gradio tarot reading application.
|
98 |
+
Handles authentication, model loading, and interface creation.
|
99 |
+
"""
|
100 |
+
# Authenticate with Hugging Face
|
101 |
+
hf_token = os.environ.get("HF_TOKEN")
|
102 |
+
if not hf_token:
|
103 |
+
raise ValueError("HF_TOKEN not found in environment variables!")
|
104 |
+
login(token=hf_token)
|
105 |
+
|
106 |
+
# Load models on CPU
|
107 |
+
image_processor, image_model = load_image_model()
|
108 |
+
text_model = load_text_model()
|
109 |
+
|
110 |
+
# Define the tarot processing function
|
111 |
+
def process_tarot(question, reason_img, result_img, recommendation_img):
|
112 |
+
"""
|
113 |
+
Process the user's question and tarot card images to generate a reading.
|
114 |
+
Uses loaded models for card identification and interpretation.
|
115 |
+
"""
|
116 |
+
try:
|
117 |
+
# Validate image uploads
|
118 |
+
if any(img is None for img in [reason_img, result_img, recommendation_img]):
|
119 |
+
return "Please upload all three cards!"
|
120 |
+
|
121 |
+
# Generate descriptions for each card using GPU
|
122 |
+
cards = []
|
123 |
+
for img in [reason_img, result_img, recommendation_img]:
|
124 |
+
sample = {
|
125 |
+
"prompt": "Please tell me the name of the tarot card in this image, specify 'reversed' if it is. ",
|
126 |
+
"image": img.convert("RGB")
|
127 |
+
}
|
128 |
+
card = generate_description(sample, image_model, image_processor)
|
129 |
+
cards.append(card)
|
130 |
+
output = "### Identifing Card Name...\n"
|
131 |
+
|
132 |
+
# Generate the full interpretation using GPU
|
133 |
+
interpretation = generate_interpretation(question, cards, text_model)
|
134 |
+
|
135 |
+
# Format the output
|
136 |
+
output += "### Card Analysis\n"
|
137 |
+
for i, card in enumerate(cards, 1):
|
138 |
+
output += f"**Position {i}:** {card}\n\n"
|
139 |
+
output += "### Full Interpretation\n"
|
140 |
+
output += interpretation
|
141 |
+
output += "\n\n**Reading complete! Reflect on these insights.**"
|
142 |
+
return output
|
143 |
+
except Exception as e:
|
144 |
+
return f"Error in reading: {str(e)}"
|
145 |
+
|
146 |
+
# Set up the Gradio interface
|
147 |
+
with gr.Blocks() as demo:
|
148 |
+
gr.Markdown("# 🔮 Advanced Tarot Reader")
|
149 |
+
gr.Markdown("### Professional-grade AI Tarot Analysis")
|
150 |
+
question = gr.Textbox(
|
151 |
+
label="Your Question",
|
152 |
+
placeholder="Enter your question for the cards...",
|
153 |
+
lines=3
|
154 |
+
)
|
155 |
+
with gr.Row():
|
156 |
+
reason_img = gr.Image(label="Reason Card", type="pil")
|
157 |
+
result_img = gr.Image(label="Result Card", type="pil")
|
158 |
+
recommendation_img = gr.Image(label="Recommendation Card", type="pil")
|
159 |
+
submit_btn = gr.Button("Perform Reading")
|
160 |
+
output = gr.Markdown()
|
161 |
+
|
162 |
+
# Connect the button to the processing function
|
163 |
+
submit_btn.click(
|
164 |
+
fn=process_tarot,
|
165 |
+
inputs=[question, reason_img, result_img, recommendation_img],
|
166 |
+
outputs=output
|
167 |
+
)
|
168 |
+
|
169 |
+
# Launch the application (no share=True for Hugging Face Spaces)
|
170 |
+
demo.launch()
|
171 |
+
|
172 |
+
# Entry point of the script
|
173 |
+
if __name__ == "__main__":
|
174 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
git+https://github.com/huggingface/transformers.git
|
3 |
+
pillow
|
4 |
+
gradio
|
5 |
+
accelerate>=0.26.0
|
6 |
+
peft
|
7 |
+
huggingface_hub
|