Spaces:

JoannaKOKO
/

Tarot_Explainer_gpu

Sleeping

App Files Files Community

JoannaKOKO commited on Mar 25

Commit

0dcb9ef

verified ·

1 Parent(s): 148b365

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -7

app.py CHANGED Viewed

@@ -43,7 +43,7 @@ def load_text_model():
 # Generate card description with ZeroGPU
 @spaces.GPU
 def generate_description(sample, model, processor):
-    # Move the image model to GPU
     model.to('cuda')
     system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.'
     messages = [
@@ -55,8 +55,7 @@ def generate_description(sample, model, processor):
     ]
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs = process_vision_info(messages)
-    inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt")
-    inputs = inputs.to("cuda")
     stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")]
     generated_ids = model.generate(
         **inputs,
@@ -68,8 +67,10 @@ def generate_description(sample, model, processor):
         disable_compile=True
     )
     generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
     output_text = processor.batch_decode(
-        generated_ids_trimmed,
         skip_special_tokens=True,
         clean_up_tokenization_spaces=False
     )
@@ -78,6 +79,8 @@ def generate_description(sample, model, processor):
 # Generate tarot interpretation with ZeroGPU
 @spaces.GPU
 def generate_interpretation(question, cards, model, tokenizer):
     prompt = f"""Analyze this tarot reading for the question: {question}
 Cards:
@@ -89,10 +92,11 @@ Provide a professional interpretation covering:
 - Combined message and symbolism
 - Practical advice
 - Potential outcomes"""
-    # Use GPU for this inference call
     input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
     outputs = model.generate(**input_ids, max_new_tokens=32)
-    return (tokenizer.decode(outputs[0]))
 def main():
     """
@@ -129,7 +133,7 @@ def main():
                 }
                 card = generate_description(sample, image_model, image_processor)
                 cards.append(card)
-                output = "### Identifing Card Name...\n"
             # Generate the full interpretation using GPU
             interpretation = generate_interpretation(question, cards, text_model, text_tokenizer)

 # Generate card description with ZeroGPU
 @spaces.GPU
 def generate_description(sample, model, processor):
+    # Ensure the model is on GPU
     model.to('cuda')
     system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.'
     messages = [
     ]
     text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     image_inputs = process_vision_info(messages)
+    inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to("cuda")
     stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")]
     generated_ids = model.generate(
         **inputs,
         disable_compile=True
     )
     generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+    # Move trimmed IDs to CPU before decoding
+    generated_ids_trimmed_cpu = [ids.cpu() for ids in generated_ids_trimmed]
     output_text = processor.batch_decode(
+        generated_ids_trimmed_cpu,
         skip_special_tokens=True,
         clean_up_tokenization_spaces=False
     )
 # Generate tarot interpretation with ZeroGPU
 @spaces.GPU
 def generate_interpretation(question, cards, model, tokenizer):
+    # Ensure the model is on GPU
+    model.to('cuda')
     prompt = f"""Analyze this tarot reading for the question: {question}
 Cards:
 - Combined message and symbolism
 - Practical advice
 - Potential outcomes"""
     input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
     outputs = model.generate(**input_ids, max_new_tokens=32)
+    # Move output to CPU before decoding
+    interpretation = tokenizer.decode(outputs[0].cpu(), skip_special_tokens=True)
+    return interpretation
 def main():
     """
                 }
                 card = generate_description(sample, image_model, image_processor)
                 cards.append(card)
+            output = "### Identifying Card Name...\n"
             # Generate the full interpretation using GPU
             interpretation = generate_interpretation(question, cards, text_model, text_tokenizer)