File size: 20,375 Bytes
8285937 cc7516e 8285937 9e2af92 8285937 9e2af92 8285937 9e2af92 8285937 9e2af92 8285937 9e2af92 cc7516e 8285937 9e2af92 8285937 e2f7b5c 8285937 e2f7b5c 8285937 e2f7b5c 8285937 e2f7b5c 8285937 e2f7b5c 8285937 9e2af92 8285937 9e2af92 cc7516e 9e2af92 cc7516e 8285937 9e2af92 8285937 e2f7b5c 8285937 e2f7b5c 8285937 2f5cada 9e2af92 cc7516e 8285937 e2f7b5c 2f5cada 8285937 e2f7b5c 8285937 e2f7b5c 8285937 e2f7b5c 8285937 2f5cada 9e2af92 cc7516e 8285937 e2f7b5c 2f5cada e2f7b5c 2f5cada e2f7b5c 9e2af92 e2f7b5c 8285937 e2f7b5c 2f5cada 9e2af92 8285937 e2f7b5c 9e2af92 6205725 9e2af92 8285937 6205725 8285937 9e2af92 e2f7b5c 8285937 9e2af92 8285937 e2f7b5c 8285937 9e2af92 01ac878 8285937 e2f7b5c 9e2af92 8285937 e2f7b5c 9e2af92 6205725 9e2af92 2f5cada 8285937 2f5cada 8285937 9e2af92 8285937 e2f7b5c f734da8 8285937 cc7516e e2f7b5c 8285937 9e2af92 8285937 9e2af92 8285937 9e2af92 8285937 9e2af92 8285937 9e2af92 8285937 9e2af92 8285937 e2f7b5c 8285937 9e2af92 8285937 2f5cada e2f7b5c 8285937 cc7516e e2f7b5c 8285937 9e2af92 8285937 9e2af92 8285937 2f5cada 8285937 2f5cada 8285937 9e2af92 8285937 e2f7b5c f734da8 8285937 9e2af92 6205725 8285937 2f5cada 9e2af92 8285937 2f5cada 8285937 9e2af92 8285937 9e2af92 8285937 4083450 8285937 cc7516e 01ac878 e2f7b5c 01ac878 cc7516e 01ac878 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 |
import gradio as gr
from google import genai
from google.genai import types
from PIL import Image
import os
import io
import uuid
import threading
import time
from dotenv import load_dotenv
load_dotenv()
# --- CONFIGURATION CONFORME DOC ---
MODELS = {
"🧠 Gemini 3 Pro Preview (Recommended)": "gemini-3-pro-image-preview",
"⚡ Gemini 2.5 Flash (Fast)": "gemini-2.5-flash-image"
}
# Valeurs exactes demandées par la documentation
RATIOS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"]
RESOLUTIONS = ["1K", "2K", "4K"] # Uppercase 'K' mandatory per docs
TEMP_CHAT_DIR = "temp_chat_images"
os.makedirs(TEMP_CHAT_DIR, exist_ok=True)
# --- UTILS ---
def get_client(api_key):
if not api_key: raise gr.Error("API Key manquante")
return genai.Client(api_key=api_key)
def safe_process_image(part):
"""Convertit les données inline brutes en Image PIL."""
try:
if part.inline_data and hasattr(part.inline_data, 'data'):
return Image.open(io.BytesIO(part.inline_data.data))
if hasattr(part, 'as_image'):
img = part.as_image()
if hasattr(img, 'image'): return img.image
return img
return None
except Exception as e:
print(f"⚠️ Image conversion error: {e}")
return None
def process_response(response):
"""Sépare le résultat final, le processus de pensée (Thinking Mode) et les sources."""
final_imgs, final_txt = [], ""
thought_imgs, thought_txt = [], ""
sources_html = None # <--- AJOUT : Conteneur pour les sources
if not response or not response.parts:
return final_imgs, final_txt, thought_imgs, thought_txt, sources_html
print(f"\n--- RECEIVED ({len(response.parts)} parts) ---")
# 1. Parsing du contenu (Images & Texte & Pensées)
for i, part in enumerate(response.parts):
is_thought = getattr(part, 'thought', False)
if is_thought:
if part.text: thought_txt += part.text + "\n"
if part.inline_data:
img = safe_process_image(part)
if img: thought_imgs.append(img)
else:
if part.text: final_txt += part.text + "\n"
if part.inline_data:
img = safe_process_image(part)
if img: final_imgs.append(img)
# 2. Parsing du Grounding (Sources) - AJOUT
# Les métadonnées sont au niveau du 'candidate', pas des 'parts'
if response.candidates and response.candidates[0].grounding_metadata:
gm = response.candidates[0].grounding_metadata
if gm.search_entry_point and gm.search_entry_point.rendered_content:
sources_html = gm.search_entry_point.rendered_content
return final_imgs, final_txt, thought_imgs, thought_txt, sources_html
# --- WORKER NETTOYAGE ---
def cleanup_old_files():
"""Supprime les fichiers vieux de plus de 1h toutes les 10 minutes."""
while True:
try:
now = time.time()
cutoff = now - 3600 # 1 heure
if os.path.exists(TEMP_CHAT_DIR):
for filename in os.listdir(TEMP_CHAT_DIR):
filepath = os.path.join(TEMP_CHAT_DIR, filename)
if os.path.isfile(filepath):
if os.path.getmtime(filepath) < cutoff:
try:
os.remove(filepath)
print(f"🧹 Supprimé : {filename}")
except Exception:
pass
except Exception as e:
print(f"⚠️ Erreur worker : {e}")
time.sleep(600)
# --- BACKEND FUNCTIONS ---
def update_api_key(new_key):
if not new_key:
return "⚠️ Clé invalide", None
return "✅ Clé enregistrée pour cette session !", new_key
def generate_studio(prompt, model_ui, ratio, resolution, grounding, user_api_key):
"""Text-to-Image standard respectant types.GenerateContentConfig"""
cli = get_client(user_api_key)
model_name = MODELS[model_ui]
# Configuration stricte selon la doc
img_conf = {"aspect_ratio": ratio}
gen_conf = {"response_modalities": ["TEXT", "IMAGE"]}
if "gemini-3" in model_name:
img_conf["image_size"] = resolution
# <--- AJOUT : Activation du Thinking Mode
gen_conf["thinking_config"] = types.ThinkingConfig(include_thoughts=True)
if grounding:
gen_conf["tools"] = [{"google_search": {}}]
gen_conf["image_config"] = types.ImageConfig(**img_conf)
try:
print(f"🚀 Sending request [T2I]...")
response = cli.models.generate_content(
model=model_name,
contents=[prompt],
config=types.GenerateContentConfig(**gen_conf)
)
# Retourne maintenant 5 éléments (avec sources)
return process_response(response)
except Exception as e:
raise gr.Error(f"API Error: {str(e)}")
def generate_composition(prompt, files, model_ui, ratio, resolution, grounding, user_api_key):
"""Composition I2I (Supporte jusqu'à 14 images selon la doc)"""
cli = get_client(user_api_key)
model_name = MODELS[model_ui]
if not files: raise gr.Error("No input images provided.")
contents = [prompt]
for p in files:
try:
contents.append(Image.open(p))
except: pass
img_conf = {"aspect_ratio": ratio}
gen_conf = {"response_modalities": ["TEXT", "IMAGE"]}
if "gemini-3" in model_name:
img_conf["image_size"] = resolution
# <--- AJOUT : Activation du Thinking Mode aussi ici par sécurité
gen_conf["thinking_config"] = types.ThinkingConfig(include_thoughts=True)
# <--- AJOUT : Grounding
if grounding:
gen_conf["tools"] = [{"google_search": {}}]
gen_conf["image_config"] = types.ImageConfig(**img_conf)
try:
print(f"🚀 Sending request [I2I]")
response = cli.models.generate_content(
model=model_name,
contents=contents,
config=types.GenerateContentConfig(**gen_conf)
)
# Gestion des 5 valeurs de retour
f_imgs, f_txt, t_imgs, t_txt, sources = process_response(response)
full_text = f_txt
# Ajout des sources et pensées au texte principal pour cet onglet
if sources: full_text += f"\n\n{sources}"
if t_txt: full_text += f"\n\n{t_txt}"
return f_imgs, full_text
except Exception as e:
raise gr.Error(f"Error: {str(e)}")
# --- CHAT LOGIC ---
def chat_respond(message, history, chat_history_data, img_input, model_ui, grounding, ratio, resolution, user_api_key):
"""Gestion du chat 'Stateless' conforme aux types Google GenAI"""
if not user_api_key: raise gr.Error("API Key manquante")
cli = get_client(user_api_key)
model_name = MODELS[model_ui]
tools = None
thinking_conf = None # <--- AJOUT variable
# Configuration Image
img_conf = {"aspect_ratio": ratio}
if "gemini-3" in model_name:
img_conf["image_size"] = resolution
# <--- AJOUT : Config Thinking
thinking_conf = types.ThinkingConfig(include_thoughts=True)
if grounding:
tools = [{"google_search": {}}]
# 1. Restauration de l'historique
chat = cli.chats.create(
model=model_name,
config=types.GenerateContentConfig(
response_modalities=['TEXT', 'IMAGE'],
tools=tools,
thinking_config=thinking_conf,
image_config=types.ImageConfig(**img_conf) # <--- AJOUT Image Config dans Chat
),
history=chat_history_data
)
# 2. Préparation du contenu utilisateur
send_contents = [message]
if img_input:
for img_path in img_input:
send_contents.append(Image.open(img_path))
user_display_text = message
if img_input:
user_display_text += f"\n\n🖼️ *({len(img_input)} Images attached)*"
user_message_obj = {"role": "user", "content": user_display_text}
try:
# 3. Envoi au modèle
response = chat.send_message(send_contents)
# Récupération des 5 valeurs
f_imgs, f_txt, t_imgs, t_txt, sources = process_response(response)
# 4. Construction réponse UI
bot_messages = []
if t_txt or t_imgs:
thought_md = "🧠 **Model Thought Process:**\n"
if t_txt: thought_md += f"> {t_txt}\n"
if t_imgs: thought_md += f"*( + {len(t_imgs)} draft image(s) not displayed)*\n"
thought_md += "---\n"
bot_messages.append({"role": "assistant", "content": thought_md})
if f_txt:
bot_messages.append({"role": "assistant", "content": f_txt})
# <--- AJOUT : Affichage des sources dans le chat
if sources:
bot_messages.append({"role": "assistant", "content": sources})
if f_imgs:
for i, img in enumerate(f_imgs):
unique_filename = f"chat_{uuid.uuid4()}_{i}.png"
file_path = os.path.join(TEMP_CHAT_DIR, unique_filename)
img.save(file_path)
img_msg = {"path": file_path, "alt_text": "Generated Image"}
bot_messages.append({"role": "assistant", "content": img_msg})
if not f_txt and not f_imgs and not t_txt and not sources:
bot_messages.append({"role": "assistant", "content": "⚠️ *Empty response.*"})
# 5. Mise à jour de l'historique Gemini
u_parts = [types.Part.from_text(text=message)]
if img_input:
for img_path in img_input:
with open(img_path, "rb") as f:
img_bytes = f.read()
u_parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg"))
user_content_obj = types.Content(role="user", parts=u_parts)
model_content_obj = response.candidates[0].content
current_data = chat_history_data if chat_history_data else []
new_gemini_history = current_data + [user_content_obj, model_content_obj]
new_ui_history = history + [user_message_obj] + bot_messages
return "", None, new_ui_history, new_gemini_history, f_imgs
except Exception as e:
err_msg = f"❌ Error: {str(e)}"
bot_err_obj = {"role": "assistant", "content": err_msg}
return "", None, history + [user_message_obj, bot_err_obj], chat_history_data, []
def clear_chat():
return [], None, []
# --- GRADIO INTERFACE ---
css = """
.container { max-width: 1200px; margin: auto; }
h1 { text-align: center; color: #4F46E5; font-size: 2.5em; }
.image-container img { max-height: 400px; width: auto; }
"""
# <--- CORRECTION : Suppression de 'css' et 'theme' ici
with gr.Blocks(title="Nano Vision Studio") as demo:
gr.Markdown("# Nano 🍌 Vision Studio")
gr.Markdown("### The Ultimate Interface: 4K Generation, Grounding, Multi-Image Composition & Iterative Chat")
user_api_key_state = gr.State(os.environ.get("GOOGLE_API_KEY", ""))
chat_state = gr.State(None)
with gr.Tabs():
# --- TAB 0 : API ---
with gr.TabItem("🔑 API Settings"):
gr.Markdown("### ⚙️ API Configuration")
with gr.Row():
with gr.Column(scale=3):
api_input = gr.Textbox(label="Google Gemini API Key", type="password", lines=1)
with gr.Column(scale=1):
api_btn = gr.Button("Save & Initialize 💾", variant="primary")
api_status = gr.Markdown()
api_btn.click(update_api_key, inputs=[api_input], outputs=[api_status, user_api_key_state])
# --- TAB 1 : STUDIO ---
with gr.TabItem("🎨 Creation Studio"):
with gr.Row():
with gr.Column(scale=1):
t1_prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Describe the scene...")
with gr.Group():
t1_model = gr.Dropdown(choices=list(MODELS.keys()), value="🧠 Gemini 3 Pro Preview (Recommended)", label="Model")
with gr.Row():
t1_ratio = gr.Dropdown(RATIOS, value="16:9", label="Aspect Ratio")
t1_res = gr.Dropdown(RESOLUTIONS, value="2K", label="Resolution (Pro only)")
t1_grounding = gr.Checkbox(label="Google Search (Grounding)")
t1_btn = gr.Button("Generate ✨", variant="primary", size="lg")
with gr.Column(scale=2):
t1_gallery = gr.Gallery(label="Final Images", columns=2, height="auto")
# <--- AJOUT : Composant pour afficher les sources HTML
t1_sources = gr.HTML(label="Grounding Sources")
t1_text = gr.Markdown(label="Generated Text")
with gr.Accordion("🧠 Thought Process", open=False):
t1_thought_imgs = gr.Gallery(label="Visual Drafts", columns=4, height=150)
# Utilisation de Markdown pour un meilleur rendu du flux de pensée
t1_thought_txt = gr.Markdown(label="Thought Stream")
t1_btn.click(
generate_studio,
inputs=[t1_prompt, t1_model, t1_ratio, t1_res, t1_grounding, user_api_key_state],
outputs=[t1_gallery, t1_text, t1_thought_imgs, t1_thought_txt, t1_sources] # <--- Ajout t1_sources dans les outputs
)
# --- TAB 2 : COMPOSITION ---
with gr.TabItem("🛠️ Composition"):
with gr.Row():
with gr.Column(scale=1):
t2_files = gr.File(label="Reference Images (Max 14)", file_count="multiple", type="filepath")
t2_prompt = gr.Textbox(label="Instructions", lines=3)
with gr.Accordion("Advanced Settings", open=False):
t2_model = gr.Dropdown(list(MODELS.keys()), value="🧠 Gemini 3 Pro Preview (Recommended)", label="Model")
with gr.Row():
t2_ratio = gr.Dropdown(RATIOS, value="1:1", label="Aspect Ratio")
t2_res = gr.Dropdown(RESOLUTIONS, value="1K", label="Output Resolution")
t2_grounding = gr.Checkbox(label="Google Search (Grounding)") # <--- AJOUT Grounding
t2_btn = gr.Button("Run", variant="primary")
with gr.Column(scale=2):
t2_gallery = gr.Gallery(label="Result", columns=1)
t2_text = gr.Markdown()
t2_btn.click(
generate_composition,
inputs=[t2_prompt, t2_files, t2_model, t2_ratio, t2_res, t2_grounding, user_api_key_state],
outputs=[t2_gallery, t2_text]
)
# --- TAB 3 : CHAT ---
with gr.TabItem("💬 Chat & Refinement"):
with gr.Row():
with gr.Column(scale=2):
# <--- CORRECTION : Suppression de 'type="messages"' ici
chat_history = gr.Chatbot(label="Session History", height=600)
with gr.Row():
chat_input = gr.Textbox(label="Your Message", scale=4)
# chat_img = gr.Image(label="Input Image", type="filepath", height=100)
chat_img = gr.File(label="Attach Images (Max 14)", file_count="multiple", type="filepath", height=100)
with gr.Row():
chat_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("🗑️ New Session")
with gr.Accordion("Chat Options", open=False):
c_model = gr.Dropdown(list(MODELS.keys()), value="🧠 Gemini 3 Pro Preview (Recommended)", label="Model")
with gr.Row():
c_ratio = gr.Dropdown(RATIOS, value="16:9", label="Aspect Ratio") # <--- AJOUT Ratio
c_res = gr.Dropdown(RESOLUTIONS, value="2K", label="Resolution (Pro only)") # <--- AJOUT Resolution
c_grounding = gr.Checkbox(label="Grounding")
with gr.Column(scale=1):
chat_gallery_zoom = gr.Gallery(label="Zoom", columns=1, height="auto")
chat_btn.click(
chat_respond,
inputs=[chat_input, chat_history, chat_state, chat_img, c_model, c_grounding, c_ratio, c_res, user_api_key_state],
outputs=[chat_input, chat_img, chat_history, chat_state, chat_gallery_zoom]
)
clear_btn.click(
clear_chat,
inputs=[],
outputs=[chat_history, chat_state, chat_gallery_zoom]
)
# --- TAB 4 : GUIDE ---
with gr.TabItem("📚 Guide"):
gr.Markdown("""
# Comprehensive Guide
Welcome to the ultimate interface for **Nano Banana Pro** (Gemini 3 Pro) and **Nano Banana** (Gemini 2.5 Flash).
## 🚀 Choose Your Model
| Feature | ⚡ Gemini 2.5 Flash (Nano Banana) | 🧠 Gemini 3 Pro (Nano Banana Pro) |
| :--- | :--- | :--- |
| **Best For** | Speed, High Volume, Prototyping | Professional Assets, Complex Logic, Text Rendering |
| **Resolution** | 1024x1024 (Native) | Up to **4K** (High Fidelity) |
| **Inputs** | Text + Images | Text + up to **14 Reference Images** |
| **Special** | Fast & Efficient | **Thinking Process**, **Search Grounding** |
---
## ✨ Advanced Capabilities Explained
### 1. 🧠 The "Thinking" Process (Pro Only)
Nano Banana Pro doesn't just draw; it *thinks*. Before generating pixels, it reasons through your prompt to understand composition, lighting, and logic.
* **In this App:** Check the **"Thought Process"** accordion in the *Creation Studio* to read the model's internal monologue and see draft visualizations (Thought Images).
### 2. 🌍 Search Grounding (Real-Time Data)
The model isn't stuck in the past. It can access **Google Search** to generate images based on live data.
* **Try this:** "Visualize the current weather forecast for Tokyo as a modern chart."
* **In this App:** Enable the **"Grounding"** checkbox. Sources will appear below the generated image.
### 3. 🖼️ Advanced Composition (up to 14 Images)
While Flash handles fewer inputs, Pro can mix up to **14 images**!
* **Use Case:** Style transfer, maintaining character consistency, or complex collages.
* **How:** Use the **"Composition"** tab to upload multiple reference files.
---
## 💡 Prompting Masterclass
To get the best results, follow these professional tips:
* **Be Hyper-Specific:** Don't just say "a cat". Say *"A photorealistic close-up of a Siamese cat, golden hour lighting, captured with an 85mm lens"*.
* **Provide Context:** Explain the intent. *"Create a logo for a high-end minimalist skincare brand"*.
* **Positive Framing:** Instead of "no cars", describe the scene as *"an empty, deserted street"*.
* **Iterate with Chat:** Don't expect perfection on turn #1. Use the **Chat & Refinement** tab to say *"Make the lighting warmer"* or *"Add a wizard hat"*.
## ⚡ Performance Tips
* **4K Generation:** Available only on Pro. It costs more but delivers stunning print-quality results.
* **Aspect Ratios:** We support everything from **1:1** to **21:9** (Cinematic).
""")
if __name__ == "__main__":
threading.Thread(target=cleanup_old_files, daemon=True).start()
demo.queue(default_concurrency_limit=20)
# <--- CORRECTION : Ajout de 'css' et 'theme' ici
demo.launch(
theme=gr.themes.Soft(),
css=css,
max_threads=40,
show_error=True,
server_name="0.0.0.0",
server_port=7860,
share=False
) |