hasanbasbunar commited on
Commit
e2f7b5c
·
verified ·
1 Parent(s): 620aef1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -34
app.py CHANGED
@@ -45,16 +45,18 @@ def safe_process_image(part):
45
  return None
46
 
47
  def process_response(response):
48
- """Sépare le résultat final du processus de pensée (Thinking Mode)."""
49
  final_imgs, final_txt = [], ""
50
  thought_imgs, thought_txt = [], ""
 
51
 
52
  if not response or not response.parts:
53
- return final_imgs, final_txt, thought_imgs, thought_txt
54
 
55
  print(f"\n--- RECEIVED ({len(response.parts)} parts) ---")
 
 
56
  for i, part in enumerate(response.parts):
57
- # La doc mentionne l'attribut 'thought' pour le Gemini 3 Pro Thinking Mode
58
  is_thought = getattr(part, 'thought', False)
59
 
60
  if is_thought:
@@ -68,7 +70,14 @@ def process_response(response):
68
  img = safe_process_image(part)
69
  if img: final_imgs.append(img)
70
 
71
- return final_imgs, final_txt, thought_imgs, thought_txt
 
 
 
 
 
 
 
72
 
73
  # --- WORKER NETTOYAGE ---
74
 
@@ -110,6 +119,8 @@ def generate_studio(prompt, model_ui, ratio, resolution, grounding, user_api_key
110
 
111
  if "gemini-3" in model_name:
112
  img_conf["image_size"] = resolution
 
 
113
  if grounding:
114
  gen_conf["tools"] = [{"google_search": {}}]
115
 
@@ -122,6 +133,7 @@ def generate_studio(prompt, model_ui, ratio, resolution, grounding, user_api_key
122
  contents=[prompt],
123
  config=types.GenerateContentConfig(**gen_conf)
124
  )
 
125
  return process_response(response)
126
  except Exception as e:
127
  raise gr.Error(f"API Error: {str(e)}")
@@ -136,7 +148,6 @@ def generate_composition(prompt, files, model_ui, ratio, resolution, user_api_ke
136
  contents = [prompt]
137
  for p in files:
138
  try:
139
- # La doc Python montre qu'on peut passer des objets PIL Image directement
140
  contents.append(Image.open(p))
141
  except: pass
142
 
@@ -145,6 +156,8 @@ def generate_composition(prompt, files, model_ui, ratio, resolution, user_api_ke
145
 
146
  if "gemini-3" in model_name:
147
  img_conf["image_size"] = resolution
 
 
148
 
149
  gen_conf["image_config"] = types.ImageConfig(**img_conf)
150
 
@@ -155,14 +168,19 @@ def generate_composition(prompt, files, model_ui, ratio, resolution, user_api_ke
155
  contents=contents,
156
  config=types.GenerateContentConfig(**gen_conf)
157
  )
158
- f_imgs, f_txt, t_imgs, t_txt = process_response(response)
 
 
159
  full_text = f_txt
160
- if t_txt: full_text += f"\n\n--- 🧠 MODEL REASONING ---\n{t_txt}"
 
 
 
161
  return f_imgs, full_text
162
  except Exception as e:
163
  raise gr.Error(f"Error: {str(e)}")
164
 
165
- # --- CHAT LOGIC (STRICT DOC COMPLIANCE) ---
166
 
167
  def chat_respond(message, history, chat_history_data, img_input, model_ui, grounding, user_api_key):
168
  """Gestion du chat 'Stateless' conforme aux types Google GenAI"""
@@ -173,30 +191,30 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
173
  model_name = MODELS[model_ui]
174
 
175
  tools = None
176
- if grounding and "gemini-3" in model_name:
177
- tools = [{"google_search": {}}]
 
 
 
 
 
178
 
179
- # 1. Restauration de l'historique (Respecte les Thought Signatures)
180
- # On passe la liste brute des objets 'Content' stockés précédemment
181
  chat = cli.chats.create(
182
  model=model_name,
183
  config=types.GenerateContentConfig(
184
  response_modalities=['TEXT', 'IMAGE'],
185
- tools=tools
 
186
  ),
187
  history=chat_history_data
188
  )
189
 
190
- # 2. Préparation du contenu utilisateur (User Content)
191
- # Note: Pour l'historique manuel, on utilise des types.Part explicitement
192
- # pour garantir la sérialisation correcte si nécessaire.
193
-
194
- # Contenu pour l'envoi immédiat (la méthode .send_message accepte PIL Image ou String)
195
  send_contents = [message]
196
  if img_input:
197
  send_contents.append(Image.open(img_input))
198
 
199
- # Contenu pour l'affichage UI
200
  user_display_text = message
201
  if img_input:
202
  user_display_text += "\n\n🖼️ *(Image attached)*"
@@ -205,7 +223,8 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
205
  try:
206
  # 3. Envoi au modèle
207
  response = chat.send_message(send_contents)
208
- f_imgs, f_txt, t_imgs, t_txt = process_response(response)
 
209
 
210
  # 4. Construction réponse UI
211
  bot_messages = []
@@ -219,7 +238,11 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
219
 
220
  if f_txt:
221
  bot_messages.append({"role": "assistant", "content": f_txt})
222
-
 
 
 
 
223
  if f_imgs:
224
  for i, img in enumerate(f_imgs):
225
  unique_filename = f"chat_{uuid.uuid4()}_{i}.png"
@@ -228,14 +251,10 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
228
  img_msg = {"path": file_path, "alt_text": "Generated Image"}
229
  bot_messages.append({"role": "assistant", "content": img_msg})
230
 
231
- if not f_txt and not f_imgs and not t_txt:
232
  bot_messages.append({"role": "assistant", "content": "⚠️ *Empty response.*"})
233
 
234
- # 5. Mise à jour de l'historique Gemini (CRITIQUE POUR LA DOC)
235
- # Nous construisons manuellement les objets Content pour le State
236
- # Cela simule ce que le SDK ferait en interne si l'objet Chat persistait.
237
-
238
- # A. Création Content User
239
  u_parts = [types.Part.from_text(text=message)]
240
  if img_input:
241
  with open(img_input, "rb") as f:
@@ -243,14 +262,11 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
243
  u_parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg"))
244
  user_content_obj = types.Content(role="user", parts=u_parts)
245
 
246
- # B. Récupération Content Model (Contient les signatures de pensée cachées)
247
  model_content_obj = response.candidates[0].content
248
 
249
- # C. Fusion
250
  current_data = chat_history_data if chat_history_data else []
251
  new_gemini_history = current_data + [user_content_obj, model_content_obj]
252
 
253
- # Historique UI
254
  new_ui_history = history + [user_message_obj] + bot_messages
255
 
256
  return "", new_ui_history, new_gemini_history, f_imgs
@@ -271,13 +287,14 @@ h1 { text-align: center; color: #4F46E5; font-size: 2.5em; }
271
  .image-container img { max-height: 400px; width: auto; }
272
  """
273
 
 
274
  with gr.Blocks(title="Nano Vision Studio") as demo:
275
 
276
  gr.Markdown("# Nano 🍌 Vision Studio")
277
  gr.Markdown("### The Ultimate Interface: 4K Generation, Grounding, Multi-Image Composition & Iterative Chat")
278
 
279
  user_api_key_state = gr.State(os.environ.get("GOOGLE_API_KEY", ""))
280
- chat_state = gr.State(None) # Stocke la liste des types.Content
281
 
282
  with gr.Tabs():
283
  # --- TAB 0 : API ---
@@ -306,15 +323,18 @@ with gr.Blocks(title="Nano Vision Studio") as demo:
306
 
307
  with gr.Column(scale=2):
308
  t1_gallery = gr.Gallery(label="Final Images", columns=2, height="auto")
 
 
309
  t1_text = gr.Markdown(label="Generated Text")
310
  with gr.Accordion("🧠 Thought Process", open=False):
311
  t1_thought_imgs = gr.Gallery(label="Visual Drafts", columns=4, height=150)
312
- t1_thought_txt = gr.Textbox(label="Thought Stream", interactive=False, lines=4)
 
313
 
314
  t1_btn.click(
315
  generate_studio,
316
  inputs=[t1_prompt, t1_model, t1_ratio, t1_res, t1_grounding, user_api_key_state],
317
- outputs=[t1_gallery, t1_text, t1_thought_imgs, t1_thought_txt]
318
  )
319
 
320
  # --- TAB 2 : COMPOSITION ---
@@ -344,7 +364,7 @@ with gr.Blocks(title="Nano Vision Studio") as demo:
344
  with gr.TabItem("💬 Chat & Refinement"):
345
  with gr.Row():
346
  with gr.Column(scale=2):
347
- # type="messages" est requis pour Gradio 6
348
  chat_history = gr.Chatbot(label="Session History", height=600)
349
  with gr.Row():
350
  chat_input = gr.Textbox(label="Your Message", scale=4)
@@ -386,6 +406,8 @@ with gr.Blocks(title="Nano Vision Studio") as demo:
386
  if __name__ == "__main__":
387
  threading.Thread(target=cleanup_old_files, daemon=True).start()
388
  demo.queue(default_concurrency_limit=20)
 
 
389
  demo.launch(
390
  theme=gr.themes.Soft(),
391
  css=css,
 
45
  return None
46
 
47
  def process_response(response):
48
+ """Sépare le résultat final, le processus de pensée (Thinking Mode) et les sources."""
49
  final_imgs, final_txt = [], ""
50
  thought_imgs, thought_txt = [], ""
51
+ sources_html = None # <--- AJOUT : Conteneur pour les sources
52
 
53
  if not response or not response.parts:
54
+ return final_imgs, final_txt, thought_imgs, thought_txt, sources_html
55
 
56
  print(f"\n--- RECEIVED ({len(response.parts)} parts) ---")
57
+
58
+ # 1. Parsing du contenu (Images & Texte & Pensées)
59
  for i, part in enumerate(response.parts):
 
60
  is_thought = getattr(part, 'thought', False)
61
 
62
  if is_thought:
 
70
  img = safe_process_image(part)
71
  if img: final_imgs.append(img)
72
 
73
+ # 2. Parsing du Grounding (Sources) - AJOUT
74
+ # Les métadonnées sont au niveau du 'candidate', pas des 'parts'
75
+ if response.candidates and response.candidates[0].grounding_metadata:
76
+ gm = response.candidates[0].grounding_metadata
77
+ if gm.search_entry_point and gm.search_entry_point.rendered_content:
78
+ sources_html = gm.search_entry_point.rendered_content
79
+
80
+ return final_imgs, final_txt, thought_imgs, thought_txt, sources_html
81
 
82
  # --- WORKER NETTOYAGE ---
83
 
 
119
 
120
  if "gemini-3" in model_name:
121
  img_conf["image_size"] = resolution
122
+ # <--- AJOUT : Activation du Thinking Mode
123
+ gen_conf["thinking_config"] = types.ThinkingConfig(include_thoughts=True)
124
  if grounding:
125
  gen_conf["tools"] = [{"google_search": {}}]
126
 
 
133
  contents=[prompt],
134
  config=types.GenerateContentConfig(**gen_conf)
135
  )
136
+ # Retourne maintenant 5 éléments (avec sources)
137
  return process_response(response)
138
  except Exception as e:
139
  raise gr.Error(f"API Error: {str(e)}")
 
148
  contents = [prompt]
149
  for p in files:
150
  try:
 
151
  contents.append(Image.open(p))
152
  except: pass
153
 
 
156
 
157
  if "gemini-3" in model_name:
158
  img_conf["image_size"] = resolution
159
+ # <--- AJOUT : Activation du Thinking Mode aussi ici par sécurité
160
+ gen_conf["thinking_config"] = types.ThinkingConfig(include_thoughts=True)
161
 
162
  gen_conf["image_config"] = types.ImageConfig(**img_conf)
163
 
 
168
  contents=contents,
169
  config=types.GenerateContentConfig(**gen_conf)
170
  )
171
+ # Gestion des 5 valeurs de retour
172
+ f_imgs, f_txt, t_imgs, t_txt, sources = process_response(response)
173
+
174
  full_text = f_txt
175
+ # Ajout des sources et pensées au texte principal pour cet onglet
176
+ if sources: full_text += f"\n\n{sources}"
177
+ if t_txt: full_text += f"\n\n{t_txt}"
178
+
179
  return f_imgs, full_text
180
  except Exception as e:
181
  raise gr.Error(f"Error: {str(e)}")
182
 
183
+ # --- CHAT LOGIC ---
184
 
185
  def chat_respond(message, history, chat_history_data, img_input, model_ui, grounding, user_api_key):
186
  """Gestion du chat 'Stateless' conforme aux types Google GenAI"""
 
191
  model_name = MODELS[model_ui]
192
 
193
  tools = None
194
+ thinking_conf = None # <--- AJOUT variable
195
+
196
+ if "gemini-3" in model_name:
197
+ # <--- AJOUT : Config Thinking
198
+ thinking_conf = types.ThinkingConfig(include_thoughts=True)
199
+ if grounding:
200
+ tools = [{"google_search": {}}]
201
 
202
+ # 1. Restauration de l'historique
 
203
  chat = cli.chats.create(
204
  model=model_name,
205
  config=types.GenerateContentConfig(
206
  response_modalities=['TEXT', 'IMAGE'],
207
+ tools=tools,
208
+ thinking_config=thinking_conf # <--- AJOUT
209
  ),
210
  history=chat_history_data
211
  )
212
 
213
+ # 2. Préparation du contenu utilisateur
 
 
 
 
214
  send_contents = [message]
215
  if img_input:
216
  send_contents.append(Image.open(img_input))
217
 
 
218
  user_display_text = message
219
  if img_input:
220
  user_display_text += "\n\n🖼️ *(Image attached)*"
 
223
  try:
224
  # 3. Envoi au modèle
225
  response = chat.send_message(send_contents)
226
+ # Récupération des 5 valeurs
227
+ f_imgs, f_txt, t_imgs, t_txt, sources = process_response(response)
228
 
229
  # 4. Construction réponse UI
230
  bot_messages = []
 
238
 
239
  if f_txt:
240
  bot_messages.append({"role": "assistant", "content": f_txt})
241
+
242
+ # <--- AJOUT : Affichage des sources dans le chat
243
+ if sources:
244
+ bot_messages.append({"role": "assistant", "content": sources})
245
+
246
  if f_imgs:
247
  for i, img in enumerate(f_imgs):
248
  unique_filename = f"chat_{uuid.uuid4()}_{i}.png"
 
251
  img_msg = {"path": file_path, "alt_text": "Generated Image"}
252
  bot_messages.append({"role": "assistant", "content": img_msg})
253
 
254
+ if not f_txt and not f_imgs and not t_txt and not sources:
255
  bot_messages.append({"role": "assistant", "content": "⚠️ *Empty response.*"})
256
 
257
+ # 5. Mise à jour de l'historique Gemini
 
 
 
 
258
  u_parts = [types.Part.from_text(text=message)]
259
  if img_input:
260
  with open(img_input, "rb") as f:
 
262
  u_parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg"))
263
  user_content_obj = types.Content(role="user", parts=u_parts)
264
 
 
265
  model_content_obj = response.candidates[0].content
266
 
 
267
  current_data = chat_history_data if chat_history_data else []
268
  new_gemini_history = current_data + [user_content_obj, model_content_obj]
269
 
 
270
  new_ui_history = history + [user_message_obj] + bot_messages
271
 
272
  return "", new_ui_history, new_gemini_history, f_imgs
 
287
  .image-container img { max-height: 400px; width: auto; }
288
  """
289
 
290
+ # <--- CORRECTION : Suppression de 'css' et 'theme' ici
291
  with gr.Blocks(title="Nano Vision Studio") as demo:
292
 
293
  gr.Markdown("# Nano 🍌 Vision Studio")
294
  gr.Markdown("### The Ultimate Interface: 4K Generation, Grounding, Multi-Image Composition & Iterative Chat")
295
 
296
  user_api_key_state = gr.State(os.environ.get("GOOGLE_API_KEY", ""))
297
+ chat_state = gr.State(None)
298
 
299
  with gr.Tabs():
300
  # --- TAB 0 : API ---
 
323
 
324
  with gr.Column(scale=2):
325
  t1_gallery = gr.Gallery(label="Final Images", columns=2, height="auto")
326
+ # <--- AJOUT : Composant pour afficher les sources HTML
327
+ t1_sources = gr.HTML(label="Grounding Sources")
328
  t1_text = gr.Markdown(label="Generated Text")
329
  with gr.Accordion("🧠 Thought Process", open=False):
330
  t1_thought_imgs = gr.Gallery(label="Visual Drafts", columns=4, height=150)
331
+ # t1_thought_txt = gr.Textbox(label="Thought Stream", interactive=False, lines=4)
332
+ t1_thought_txt = gr.Markdown(label="Thought Stream")
333
 
334
  t1_btn.click(
335
  generate_studio,
336
  inputs=[t1_prompt, t1_model, t1_ratio, t1_res, t1_grounding, user_api_key_state],
337
+ outputs=[t1_gallery, t1_text, t1_thought_imgs, t1_thought_txt, t1_sources] # <--- Ajout t1_sources dans les outputs
338
  )
339
 
340
  # --- TAB 2 : COMPOSITION ---
 
364
  with gr.TabItem("💬 Chat & Refinement"):
365
  with gr.Row():
366
  with gr.Column(scale=2):
367
+ # <--- CORRECTION : Suppression de 'type="messages"' ici
368
  chat_history = gr.Chatbot(label="Session History", height=600)
369
  with gr.Row():
370
  chat_input = gr.Textbox(label="Your Message", scale=4)
 
406
  if __name__ == "__main__":
407
  threading.Thread(target=cleanup_old_files, daemon=True).start()
408
  demo.queue(default_concurrency_limit=20)
409
+
410
+ # <--- CORRECTION : Ajout de 'css' et 'theme' ici
411
  demo.launch(
412
  theme=gr.themes.Soft(),
413
  css=css,