Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -43,7 +43,7 @@ def load_text_model():
|
|
43 |
# Generate card description with ZeroGPU
|
44 |
@spaces.GPU
|
45 |
def generate_description(sample, model, processor):
|
46 |
-
#
|
47 |
model.to('cuda')
|
48 |
system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.'
|
49 |
messages = [
|
@@ -55,8 +55,7 @@ def generate_description(sample, model, processor):
|
|
55 |
]
|
56 |
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
57 |
image_inputs = process_vision_info(messages)
|
58 |
-
inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt")
|
59 |
-
inputs = inputs.to("cuda")
|
60 |
stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")]
|
61 |
generated_ids = model.generate(
|
62 |
**inputs,
|
@@ -68,8 +67,10 @@ def generate_description(sample, model, processor):
|
|
68 |
disable_compile=True
|
69 |
)
|
70 |
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
|
|
|
|
71 |
output_text = processor.batch_decode(
|
72 |
-
|
73 |
skip_special_tokens=True,
|
74 |
clean_up_tokenization_spaces=False
|
75 |
)
|
@@ -78,6 +79,8 @@ def generate_description(sample, model, processor):
|
|
78 |
# Generate tarot interpretation with ZeroGPU
|
79 |
@spaces.GPU
|
80 |
def generate_interpretation(question, cards, model, tokenizer):
|
|
|
|
|
81 |
prompt = f"""Analyze this tarot reading for the question: {question}
|
82 |
|
83 |
Cards:
|
@@ -89,10 +92,11 @@ Provide a professional interpretation covering:
|
|
89 |
- Combined message and symbolism
|
90 |
- Practical advice
|
91 |
- Potential outcomes"""
|
92 |
-
# Use GPU for this inference call
|
93 |
input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
|
94 |
outputs = model.generate(**input_ids, max_new_tokens=32)
|
95 |
-
|
|
|
|
|
96 |
|
97 |
def main():
|
98 |
"""
|
@@ -129,7 +133,7 @@ def main():
|
|
129 |
}
|
130 |
card = generate_description(sample, image_model, image_processor)
|
131 |
cards.append(card)
|
132 |
-
|
133 |
|
134 |
# Generate the full interpretation using GPU
|
135 |
interpretation = generate_interpretation(question, cards, text_model, text_tokenizer)
|
|
|
43 |
# Generate card description with ZeroGPU
|
44 |
@spaces.GPU
|
45 |
def generate_description(sample, model, processor):
|
46 |
+
# Ensure the model is on GPU
|
47 |
model.to('cuda')
|
48 |
system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.'
|
49 |
messages = [
|
|
|
55 |
]
|
56 |
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
57 |
image_inputs = process_vision_info(messages)
|
58 |
+
inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to("cuda")
|
|
|
59 |
stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")]
|
60 |
generated_ids = model.generate(
|
61 |
**inputs,
|
|
|
67 |
disable_compile=True
|
68 |
)
|
69 |
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
70 |
+
# Move trimmed IDs to CPU before decoding
|
71 |
+
generated_ids_trimmed_cpu = [ids.cpu() for ids in generated_ids_trimmed]
|
72 |
output_text = processor.batch_decode(
|
73 |
+
generated_ids_trimmed_cpu,
|
74 |
skip_special_tokens=True,
|
75 |
clean_up_tokenization_spaces=False
|
76 |
)
|
|
|
79 |
# Generate tarot interpretation with ZeroGPU
|
80 |
@spaces.GPU
|
81 |
def generate_interpretation(question, cards, model, tokenizer):
|
82 |
+
# Ensure the model is on GPU
|
83 |
+
model.to('cuda')
|
84 |
prompt = f"""Analyze this tarot reading for the question: {question}
|
85 |
|
86 |
Cards:
|
|
|
92 |
- Combined message and symbolism
|
93 |
- Practical advice
|
94 |
- Potential outcomes"""
|
|
|
95 |
input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
|
96 |
outputs = model.generate(**input_ids, max_new_tokens=32)
|
97 |
+
# Move output to CPU before decoding
|
98 |
+
interpretation = tokenizer.decode(outputs[0].cpu(), skip_special_tokens=True)
|
99 |
+
return interpretation
|
100 |
|
101 |
def main():
|
102 |
"""
|
|
|
133 |
}
|
134 |
card = generate_description(sample, image_model, image_processor)
|
135 |
cards.append(card)
|
136 |
+
output = "### Identifying Card Name...\n"
|
137 |
|
138 |
# Generate the full interpretation using GPU
|
139 |
interpretation = generate_interpretation(question, cards, text_model, text_tokenizer)
|