JoannaKOKO commited on
Commit
0dcb9ef
·
verified ·
1 Parent(s): 148b365

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -43,7 +43,7 @@ def load_text_model():
43
  # Generate card description with ZeroGPU
44
  @spaces.GPU
45
  def generate_description(sample, model, processor):
46
- # Move the image model to GPU
47
  model.to('cuda')
48
  system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.'
49
  messages = [
@@ -55,8 +55,7 @@ def generate_description(sample, model, processor):
55
  ]
56
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
57
  image_inputs = process_vision_info(messages)
58
- inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt")
59
- inputs = inputs.to("cuda")
60
  stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")]
61
  generated_ids = model.generate(
62
  **inputs,
@@ -68,8 +67,10 @@ def generate_description(sample, model, processor):
68
  disable_compile=True
69
  )
70
  generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
 
 
71
  output_text = processor.batch_decode(
72
- generated_ids_trimmed,
73
  skip_special_tokens=True,
74
  clean_up_tokenization_spaces=False
75
  )
@@ -78,6 +79,8 @@ def generate_description(sample, model, processor):
78
  # Generate tarot interpretation with ZeroGPU
79
  @spaces.GPU
80
  def generate_interpretation(question, cards, model, tokenizer):
 
 
81
  prompt = f"""Analyze this tarot reading for the question: {question}
82
 
83
  Cards:
@@ -89,10 +92,11 @@ Provide a professional interpretation covering:
89
  - Combined message and symbolism
90
  - Practical advice
91
  - Potential outcomes"""
92
- # Use GPU for this inference call
93
  input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
94
  outputs = model.generate(**input_ids, max_new_tokens=32)
95
- return (tokenizer.decode(outputs[0]))
 
 
96
 
97
  def main():
98
  """
@@ -129,7 +133,7 @@ def main():
129
  }
130
  card = generate_description(sample, image_model, image_processor)
131
  cards.append(card)
132
- output = "### Identifing Card Name...\n"
133
 
134
  # Generate the full interpretation using GPU
135
  interpretation = generate_interpretation(question, cards, text_model, text_tokenizer)
 
43
  # Generate card description with ZeroGPU
44
  @spaces.GPU
45
  def generate_description(sample, model, processor):
46
+ # Ensure the model is on GPU
47
  model.to('cuda')
48
  system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.'
49
  messages = [
 
55
  ]
56
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
57
  image_inputs = process_vision_info(messages)
58
+ inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to("cuda")
 
59
  stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")]
60
  generated_ids = model.generate(
61
  **inputs,
 
67
  disable_compile=True
68
  )
69
  generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
70
+ # Move trimmed IDs to CPU before decoding
71
+ generated_ids_trimmed_cpu = [ids.cpu() for ids in generated_ids_trimmed]
72
  output_text = processor.batch_decode(
73
+ generated_ids_trimmed_cpu,
74
  skip_special_tokens=True,
75
  clean_up_tokenization_spaces=False
76
  )
 
79
  # Generate tarot interpretation with ZeroGPU
80
  @spaces.GPU
81
  def generate_interpretation(question, cards, model, tokenizer):
82
+ # Ensure the model is on GPU
83
+ model.to('cuda')
84
  prompt = f"""Analyze this tarot reading for the question: {question}
85
 
86
  Cards:
 
92
  - Combined message and symbolism
93
  - Practical advice
94
  - Potential outcomes"""
 
95
  input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
96
  outputs = model.generate(**input_ids, max_new_tokens=32)
97
+ # Move output to CPU before decoding
98
+ interpretation = tokenizer.decode(outputs[0].cpu(), skip_special_tokens=True)
99
+ return interpretation
100
 
101
  def main():
102
  """
 
133
  }
134
  card = generate_description(sample, image_model, image_processor)
135
  cards.append(card)
136
+ output = "### Identifying Card Name...\n"
137
 
138
  # Generate the full interpretation using GPU
139
  interpretation = generate_interpretation(question, cards, text_model, text_tokenizer)