Update app.py
Browse files
app.py
CHANGED
|
@@ -45,16 +45,18 @@ def safe_process_image(part):
|
|
| 45 |
return None
|
| 46 |
|
| 47 |
def process_response(response):
|
| 48 |
-
"""Sépare le résultat final
|
| 49 |
final_imgs, final_txt = [], ""
|
| 50 |
thought_imgs, thought_txt = [], ""
|
|
|
|
| 51 |
|
| 52 |
if not response or not response.parts:
|
| 53 |
-
return final_imgs, final_txt, thought_imgs, thought_txt
|
| 54 |
|
| 55 |
print(f"\n--- RECEIVED ({len(response.parts)} parts) ---")
|
|
|
|
|
|
|
| 56 |
for i, part in enumerate(response.parts):
|
| 57 |
-
# La doc mentionne l'attribut 'thought' pour le Gemini 3 Pro Thinking Mode
|
| 58 |
is_thought = getattr(part, 'thought', False)
|
| 59 |
|
| 60 |
if is_thought:
|
|
@@ -68,7 +70,14 @@ def process_response(response):
|
|
| 68 |
img = safe_process_image(part)
|
| 69 |
if img: final_imgs.append(img)
|
| 70 |
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
# --- WORKER NETTOYAGE ---
|
| 74 |
|
|
@@ -110,6 +119,8 @@ def generate_studio(prompt, model_ui, ratio, resolution, grounding, user_api_key
|
|
| 110 |
|
| 111 |
if "gemini-3" in model_name:
|
| 112 |
img_conf["image_size"] = resolution
|
|
|
|
|
|
|
| 113 |
if grounding:
|
| 114 |
gen_conf["tools"] = [{"google_search": {}}]
|
| 115 |
|
|
@@ -122,6 +133,7 @@ def generate_studio(prompt, model_ui, ratio, resolution, grounding, user_api_key
|
|
| 122 |
contents=[prompt],
|
| 123 |
config=types.GenerateContentConfig(**gen_conf)
|
| 124 |
)
|
|
|
|
| 125 |
return process_response(response)
|
| 126 |
except Exception as e:
|
| 127 |
raise gr.Error(f"API Error: {str(e)}")
|
|
@@ -136,7 +148,6 @@ def generate_composition(prompt, files, model_ui, ratio, resolution, user_api_ke
|
|
| 136 |
contents = [prompt]
|
| 137 |
for p in files:
|
| 138 |
try:
|
| 139 |
-
# La doc Python montre qu'on peut passer des objets PIL Image directement
|
| 140 |
contents.append(Image.open(p))
|
| 141 |
except: pass
|
| 142 |
|
|
@@ -145,6 +156,8 @@ def generate_composition(prompt, files, model_ui, ratio, resolution, user_api_ke
|
|
| 145 |
|
| 146 |
if "gemini-3" in model_name:
|
| 147 |
img_conf["image_size"] = resolution
|
|
|
|
|
|
|
| 148 |
|
| 149 |
gen_conf["image_config"] = types.ImageConfig(**img_conf)
|
| 150 |
|
|
@@ -155,14 +168,19 @@ def generate_composition(prompt, files, model_ui, ratio, resolution, user_api_ke
|
|
| 155 |
contents=contents,
|
| 156 |
config=types.GenerateContentConfig(**gen_conf)
|
| 157 |
)
|
| 158 |
-
|
|
|
|
|
|
|
| 159 |
full_text = f_txt
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
| 161 |
return f_imgs, full_text
|
| 162 |
except Exception as e:
|
| 163 |
raise gr.Error(f"Error: {str(e)}")
|
| 164 |
|
| 165 |
-
# --- CHAT LOGIC
|
| 166 |
|
| 167 |
def chat_respond(message, history, chat_history_data, img_input, model_ui, grounding, user_api_key):
|
| 168 |
"""Gestion du chat 'Stateless' conforme aux types Google GenAI"""
|
|
@@ -173,30 +191,30 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
|
|
| 173 |
model_name = MODELS[model_ui]
|
| 174 |
|
| 175 |
tools = None
|
| 176 |
-
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
-
# 1. Restauration de l'historique
|
| 180 |
-
# On passe la liste brute des objets 'Content' stockés précédemment
|
| 181 |
chat = cli.chats.create(
|
| 182 |
model=model_name,
|
| 183 |
config=types.GenerateContentConfig(
|
| 184 |
response_modalities=['TEXT', 'IMAGE'],
|
| 185 |
-
tools=tools
|
|
|
|
| 186 |
),
|
| 187 |
history=chat_history_data
|
| 188 |
)
|
| 189 |
|
| 190 |
-
# 2. Préparation du contenu utilisateur
|
| 191 |
-
# Note: Pour l'historique manuel, on utilise des types.Part explicitement
|
| 192 |
-
# pour garantir la sérialisation correcte si nécessaire.
|
| 193 |
-
|
| 194 |
-
# Contenu pour l'envoi immédiat (la méthode .send_message accepte PIL Image ou String)
|
| 195 |
send_contents = [message]
|
| 196 |
if img_input:
|
| 197 |
send_contents.append(Image.open(img_input))
|
| 198 |
|
| 199 |
-
# Contenu pour l'affichage UI
|
| 200 |
user_display_text = message
|
| 201 |
if img_input:
|
| 202 |
user_display_text += "\n\n🖼️ *(Image attached)*"
|
|
@@ -205,7 +223,8 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
|
|
| 205 |
try:
|
| 206 |
# 3. Envoi au modèle
|
| 207 |
response = chat.send_message(send_contents)
|
| 208 |
-
|
|
|
|
| 209 |
|
| 210 |
# 4. Construction réponse UI
|
| 211 |
bot_messages = []
|
|
@@ -219,7 +238,11 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
|
|
| 219 |
|
| 220 |
if f_txt:
|
| 221 |
bot_messages.append({"role": "assistant", "content": f_txt})
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
if f_imgs:
|
| 224 |
for i, img in enumerate(f_imgs):
|
| 225 |
unique_filename = f"chat_{uuid.uuid4()}_{i}.png"
|
|
@@ -228,14 +251,10 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
|
|
| 228 |
img_msg = {"path": file_path, "alt_text": "Generated Image"}
|
| 229 |
bot_messages.append({"role": "assistant", "content": img_msg})
|
| 230 |
|
| 231 |
-
if not f_txt and not f_imgs and not t_txt:
|
| 232 |
bot_messages.append({"role": "assistant", "content": "⚠️ *Empty response.*"})
|
| 233 |
|
| 234 |
-
# 5. Mise à jour de l'historique Gemini
|
| 235 |
-
# Nous construisons manuellement les objets Content pour le State
|
| 236 |
-
# Cela simule ce que le SDK ferait en interne si l'objet Chat persistait.
|
| 237 |
-
|
| 238 |
-
# A. Création Content User
|
| 239 |
u_parts = [types.Part.from_text(text=message)]
|
| 240 |
if img_input:
|
| 241 |
with open(img_input, "rb") as f:
|
|
@@ -243,14 +262,11 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
|
|
| 243 |
u_parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg"))
|
| 244 |
user_content_obj = types.Content(role="user", parts=u_parts)
|
| 245 |
|
| 246 |
-
# B. Récupération Content Model (Contient les signatures de pensée cachées)
|
| 247 |
model_content_obj = response.candidates[0].content
|
| 248 |
|
| 249 |
-
# C. Fusion
|
| 250 |
current_data = chat_history_data if chat_history_data else []
|
| 251 |
new_gemini_history = current_data + [user_content_obj, model_content_obj]
|
| 252 |
|
| 253 |
-
# Historique UI
|
| 254 |
new_ui_history = history + [user_message_obj] + bot_messages
|
| 255 |
|
| 256 |
return "", new_ui_history, new_gemini_history, f_imgs
|
|
@@ -271,13 +287,14 @@ h1 { text-align: center; color: #4F46E5; font-size: 2.5em; }
|
|
| 271 |
.image-container img { max-height: 400px; width: auto; }
|
| 272 |
"""
|
| 273 |
|
|
|
|
| 274 |
with gr.Blocks(title="Nano Vision Studio") as demo:
|
| 275 |
|
| 276 |
gr.Markdown("# Nano 🍌 Vision Studio")
|
| 277 |
gr.Markdown("### The Ultimate Interface: 4K Generation, Grounding, Multi-Image Composition & Iterative Chat")
|
| 278 |
|
| 279 |
user_api_key_state = gr.State(os.environ.get("GOOGLE_API_KEY", ""))
|
| 280 |
-
chat_state = gr.State(None)
|
| 281 |
|
| 282 |
with gr.Tabs():
|
| 283 |
# --- TAB 0 : API ---
|
|
@@ -306,15 +323,18 @@ with gr.Blocks(title="Nano Vision Studio") as demo:
|
|
| 306 |
|
| 307 |
with gr.Column(scale=2):
|
| 308 |
t1_gallery = gr.Gallery(label="Final Images", columns=2, height="auto")
|
|
|
|
|
|
|
| 309 |
t1_text = gr.Markdown(label="Generated Text")
|
| 310 |
with gr.Accordion("🧠 Thought Process", open=False):
|
| 311 |
t1_thought_imgs = gr.Gallery(label="Visual Drafts", columns=4, height=150)
|
| 312 |
-
t1_thought_txt = gr.Textbox(label="Thought Stream", interactive=False, lines=4)
|
|
|
|
| 313 |
|
| 314 |
t1_btn.click(
|
| 315 |
generate_studio,
|
| 316 |
inputs=[t1_prompt, t1_model, t1_ratio, t1_res, t1_grounding, user_api_key_state],
|
| 317 |
-
outputs=[t1_gallery, t1_text, t1_thought_imgs, t1_thought_txt]
|
| 318 |
)
|
| 319 |
|
| 320 |
# --- TAB 2 : COMPOSITION ---
|
|
@@ -344,7 +364,7 @@ with gr.Blocks(title="Nano Vision Studio") as demo:
|
|
| 344 |
with gr.TabItem("💬 Chat & Refinement"):
|
| 345 |
with gr.Row():
|
| 346 |
with gr.Column(scale=2):
|
| 347 |
-
# type="messages"
|
| 348 |
chat_history = gr.Chatbot(label="Session History", height=600)
|
| 349 |
with gr.Row():
|
| 350 |
chat_input = gr.Textbox(label="Your Message", scale=4)
|
|
@@ -386,6 +406,8 @@ with gr.Blocks(title="Nano Vision Studio") as demo:
|
|
| 386 |
if __name__ == "__main__":
|
| 387 |
threading.Thread(target=cleanup_old_files, daemon=True).start()
|
| 388 |
demo.queue(default_concurrency_limit=20)
|
|
|
|
|
|
|
| 389 |
demo.launch(
|
| 390 |
theme=gr.themes.Soft(),
|
| 391 |
css=css,
|
|
|
|
| 45 |
return None
|
| 46 |
|
| 47 |
def process_response(response):
|
| 48 |
+
"""Sépare le résultat final, le processus de pensée (Thinking Mode) et les sources."""
|
| 49 |
final_imgs, final_txt = [], ""
|
| 50 |
thought_imgs, thought_txt = [], ""
|
| 51 |
+
sources_html = None # <--- AJOUT : Conteneur pour les sources
|
| 52 |
|
| 53 |
if not response or not response.parts:
|
| 54 |
+
return final_imgs, final_txt, thought_imgs, thought_txt, sources_html
|
| 55 |
|
| 56 |
print(f"\n--- RECEIVED ({len(response.parts)} parts) ---")
|
| 57 |
+
|
| 58 |
+
# 1. Parsing du contenu (Images & Texte & Pensées)
|
| 59 |
for i, part in enumerate(response.parts):
|
|
|
|
| 60 |
is_thought = getattr(part, 'thought', False)
|
| 61 |
|
| 62 |
if is_thought:
|
|
|
|
| 70 |
img = safe_process_image(part)
|
| 71 |
if img: final_imgs.append(img)
|
| 72 |
|
| 73 |
+
# 2. Parsing du Grounding (Sources) - AJOUT
|
| 74 |
+
# Les métadonnées sont au niveau du 'candidate', pas des 'parts'
|
| 75 |
+
if response.candidates and response.candidates[0].grounding_metadata:
|
| 76 |
+
gm = response.candidates[0].grounding_metadata
|
| 77 |
+
if gm.search_entry_point and gm.search_entry_point.rendered_content:
|
| 78 |
+
sources_html = gm.search_entry_point.rendered_content
|
| 79 |
+
|
| 80 |
+
return final_imgs, final_txt, thought_imgs, thought_txt, sources_html
|
| 81 |
|
| 82 |
# --- WORKER NETTOYAGE ---
|
| 83 |
|
|
|
|
| 119 |
|
| 120 |
if "gemini-3" in model_name:
|
| 121 |
img_conf["image_size"] = resolution
|
| 122 |
+
# <--- AJOUT : Activation du Thinking Mode
|
| 123 |
+
gen_conf["thinking_config"] = types.ThinkingConfig(include_thoughts=True)
|
| 124 |
if grounding:
|
| 125 |
gen_conf["tools"] = [{"google_search": {}}]
|
| 126 |
|
|
|
|
| 133 |
contents=[prompt],
|
| 134 |
config=types.GenerateContentConfig(**gen_conf)
|
| 135 |
)
|
| 136 |
+
# Retourne maintenant 5 éléments (avec sources)
|
| 137 |
return process_response(response)
|
| 138 |
except Exception as e:
|
| 139 |
raise gr.Error(f"API Error: {str(e)}")
|
|
|
|
| 148 |
contents = [prompt]
|
| 149 |
for p in files:
|
| 150 |
try:
|
|
|
|
| 151 |
contents.append(Image.open(p))
|
| 152 |
except: pass
|
| 153 |
|
|
|
|
| 156 |
|
| 157 |
if "gemini-3" in model_name:
|
| 158 |
img_conf["image_size"] = resolution
|
| 159 |
+
# <--- AJOUT : Activation du Thinking Mode aussi ici par sécurité
|
| 160 |
+
gen_conf["thinking_config"] = types.ThinkingConfig(include_thoughts=True)
|
| 161 |
|
| 162 |
gen_conf["image_config"] = types.ImageConfig(**img_conf)
|
| 163 |
|
|
|
|
| 168 |
contents=contents,
|
| 169 |
config=types.GenerateContentConfig(**gen_conf)
|
| 170 |
)
|
| 171 |
+
# Gestion des 5 valeurs de retour
|
| 172 |
+
f_imgs, f_txt, t_imgs, t_txt, sources = process_response(response)
|
| 173 |
+
|
| 174 |
full_text = f_txt
|
| 175 |
+
# Ajout des sources et pensées au texte principal pour cet onglet
|
| 176 |
+
if sources: full_text += f"\n\n{sources}"
|
| 177 |
+
if t_txt: full_text += f"\n\n{t_txt}"
|
| 178 |
+
|
| 179 |
return f_imgs, full_text
|
| 180 |
except Exception as e:
|
| 181 |
raise gr.Error(f"Error: {str(e)}")
|
| 182 |
|
| 183 |
+
# --- CHAT LOGIC ---
|
| 184 |
|
| 185 |
def chat_respond(message, history, chat_history_data, img_input, model_ui, grounding, user_api_key):
|
| 186 |
"""Gestion du chat 'Stateless' conforme aux types Google GenAI"""
|
|
|
|
| 191 |
model_name = MODELS[model_ui]
|
| 192 |
|
| 193 |
tools = None
|
| 194 |
+
thinking_conf = None # <--- AJOUT variable
|
| 195 |
+
|
| 196 |
+
if "gemini-3" in model_name:
|
| 197 |
+
# <--- AJOUT : Config Thinking
|
| 198 |
+
thinking_conf = types.ThinkingConfig(include_thoughts=True)
|
| 199 |
+
if grounding:
|
| 200 |
+
tools = [{"google_search": {}}]
|
| 201 |
|
| 202 |
+
# 1. Restauration de l'historique
|
|
|
|
| 203 |
chat = cli.chats.create(
|
| 204 |
model=model_name,
|
| 205 |
config=types.GenerateContentConfig(
|
| 206 |
response_modalities=['TEXT', 'IMAGE'],
|
| 207 |
+
tools=tools,
|
| 208 |
+
thinking_config=thinking_conf # <--- AJOUT
|
| 209 |
),
|
| 210 |
history=chat_history_data
|
| 211 |
)
|
| 212 |
|
| 213 |
+
# 2. Préparation du contenu utilisateur
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
send_contents = [message]
|
| 215 |
if img_input:
|
| 216 |
send_contents.append(Image.open(img_input))
|
| 217 |
|
|
|
|
| 218 |
user_display_text = message
|
| 219 |
if img_input:
|
| 220 |
user_display_text += "\n\n🖼️ *(Image attached)*"
|
|
|
|
| 223 |
try:
|
| 224 |
# 3. Envoi au modèle
|
| 225 |
response = chat.send_message(send_contents)
|
| 226 |
+
# Récupération des 5 valeurs
|
| 227 |
+
f_imgs, f_txt, t_imgs, t_txt, sources = process_response(response)
|
| 228 |
|
| 229 |
# 4. Construction réponse UI
|
| 230 |
bot_messages = []
|
|
|
|
| 238 |
|
| 239 |
if f_txt:
|
| 240 |
bot_messages.append({"role": "assistant", "content": f_txt})
|
| 241 |
+
|
| 242 |
+
# <--- AJOUT : Affichage des sources dans le chat
|
| 243 |
+
if sources:
|
| 244 |
+
bot_messages.append({"role": "assistant", "content": sources})
|
| 245 |
+
|
| 246 |
if f_imgs:
|
| 247 |
for i, img in enumerate(f_imgs):
|
| 248 |
unique_filename = f"chat_{uuid.uuid4()}_{i}.png"
|
|
|
|
| 251 |
img_msg = {"path": file_path, "alt_text": "Generated Image"}
|
| 252 |
bot_messages.append({"role": "assistant", "content": img_msg})
|
| 253 |
|
| 254 |
+
if not f_txt and not f_imgs and not t_txt and not sources:
|
| 255 |
bot_messages.append({"role": "assistant", "content": "⚠️ *Empty response.*"})
|
| 256 |
|
| 257 |
+
# 5. Mise à jour de l'historique Gemini
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
u_parts = [types.Part.from_text(text=message)]
|
| 259 |
if img_input:
|
| 260 |
with open(img_input, "rb") as f:
|
|
|
|
| 262 |
u_parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg"))
|
| 263 |
user_content_obj = types.Content(role="user", parts=u_parts)
|
| 264 |
|
|
|
|
| 265 |
model_content_obj = response.candidates[0].content
|
| 266 |
|
|
|
|
| 267 |
current_data = chat_history_data if chat_history_data else []
|
| 268 |
new_gemini_history = current_data + [user_content_obj, model_content_obj]
|
| 269 |
|
|
|
|
| 270 |
new_ui_history = history + [user_message_obj] + bot_messages
|
| 271 |
|
| 272 |
return "", new_ui_history, new_gemini_history, f_imgs
|
|
|
|
| 287 |
.image-container img { max-height: 400px; width: auto; }
|
| 288 |
"""
|
| 289 |
|
| 290 |
+
# <--- CORRECTION : Suppression de 'css' et 'theme' ici
|
| 291 |
with gr.Blocks(title="Nano Vision Studio") as demo:
|
| 292 |
|
| 293 |
gr.Markdown("# Nano 🍌 Vision Studio")
|
| 294 |
gr.Markdown("### The Ultimate Interface: 4K Generation, Grounding, Multi-Image Composition & Iterative Chat")
|
| 295 |
|
| 296 |
user_api_key_state = gr.State(os.environ.get("GOOGLE_API_KEY", ""))
|
| 297 |
+
chat_state = gr.State(None)
|
| 298 |
|
| 299 |
with gr.Tabs():
|
| 300 |
# --- TAB 0 : API ---
|
|
|
|
| 323 |
|
| 324 |
with gr.Column(scale=2):
|
| 325 |
t1_gallery = gr.Gallery(label="Final Images", columns=2, height="auto")
|
| 326 |
+
# <--- AJOUT : Composant pour afficher les sources HTML
|
| 327 |
+
t1_sources = gr.HTML(label="Grounding Sources")
|
| 328 |
t1_text = gr.Markdown(label="Generated Text")
|
| 329 |
with gr.Accordion("🧠 Thought Process", open=False):
|
| 330 |
t1_thought_imgs = gr.Gallery(label="Visual Drafts", columns=4, height=150)
|
| 331 |
+
# t1_thought_txt = gr.Textbox(label="Thought Stream", interactive=False, lines=4)
|
| 332 |
+
t1_thought_txt = gr.Markdown(label="Thought Stream")
|
| 333 |
|
| 334 |
t1_btn.click(
|
| 335 |
generate_studio,
|
| 336 |
inputs=[t1_prompt, t1_model, t1_ratio, t1_res, t1_grounding, user_api_key_state],
|
| 337 |
+
outputs=[t1_gallery, t1_text, t1_thought_imgs, t1_thought_txt, t1_sources] # <--- Ajout t1_sources dans les outputs
|
| 338 |
)
|
| 339 |
|
| 340 |
# --- TAB 2 : COMPOSITION ---
|
|
|
|
| 364 |
with gr.TabItem("💬 Chat & Refinement"):
|
| 365 |
with gr.Row():
|
| 366 |
with gr.Column(scale=2):
|
| 367 |
+
# <--- CORRECTION : Suppression de 'type="messages"' ici
|
| 368 |
chat_history = gr.Chatbot(label="Session History", height=600)
|
| 369 |
with gr.Row():
|
| 370 |
chat_input = gr.Textbox(label="Your Message", scale=4)
|
|
|
|
| 406 |
if __name__ == "__main__":
|
| 407 |
threading.Thread(target=cleanup_old_files, daemon=True).start()
|
| 408 |
demo.queue(default_concurrency_limit=20)
|
| 409 |
+
|
| 410 |
+
# <--- CORRECTION : Ajout de 'css' et 'theme' ici
|
| 411 |
demo.launch(
|
| 412 |
theme=gr.themes.Soft(),
|
| 413 |
css=css,
|