|
|
import gradio as gr |
|
|
from google import genai |
|
|
from google.genai import types |
|
|
from PIL import Image |
|
|
import os |
|
|
import io |
|
|
import uuid |
|
|
import threading |
|
|
import time |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
MODELS = { |
|
|
"🧠 Gemini 3 Pro Preview (Recommended)": "gemini-3-pro-image-preview", |
|
|
"⚡ Gemini 2.5 Flash (Fast)": "gemini-2.5-flash-image" |
|
|
} |
|
|
|
|
|
|
|
|
RATIOS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"] |
|
|
RESOLUTIONS = ["1K", "2K", "4K"] |
|
|
|
|
|
TEMP_CHAT_DIR = "temp_chat_images" |
|
|
os.makedirs(TEMP_CHAT_DIR, exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
def get_client(api_key): |
|
|
if not api_key: raise gr.Error("API Key manquante") |
|
|
return genai.Client(api_key=api_key) |
|
|
|
|
|
def safe_process_image(part): |
|
|
"""Convertit les données inline brutes en Image PIL.""" |
|
|
try: |
|
|
if part.inline_data and hasattr(part.inline_data, 'data'): |
|
|
return Image.open(io.BytesIO(part.inline_data.data)) |
|
|
if hasattr(part, 'as_image'): |
|
|
img = part.as_image() |
|
|
if hasattr(img, 'image'): return img.image |
|
|
return img |
|
|
return None |
|
|
except Exception as e: |
|
|
print(f"⚠️ Image conversion error: {e}") |
|
|
return None |
|
|
|
|
|
def process_response(response): |
|
|
"""Sépare le résultat final, le processus de pensée (Thinking Mode) et les sources.""" |
|
|
final_imgs, final_txt = [], "" |
|
|
thought_imgs, thought_txt = [], "" |
|
|
sources_html = None |
|
|
|
|
|
if not response or not response.parts: |
|
|
return final_imgs, final_txt, thought_imgs, thought_txt, sources_html |
|
|
|
|
|
print(f"\n--- RECEIVED ({len(response.parts)} parts) ---") |
|
|
|
|
|
|
|
|
for i, part in enumerate(response.parts): |
|
|
is_thought = getattr(part, 'thought', False) |
|
|
|
|
|
if is_thought: |
|
|
if part.text: thought_txt += part.text + "\n" |
|
|
if part.inline_data: |
|
|
img = safe_process_image(part) |
|
|
if img: thought_imgs.append(img) |
|
|
else: |
|
|
if part.text: final_txt += part.text + "\n" |
|
|
if part.inline_data: |
|
|
img = safe_process_image(part) |
|
|
if img: final_imgs.append(img) |
|
|
|
|
|
|
|
|
|
|
|
if response.candidates and response.candidates[0].grounding_metadata: |
|
|
gm = response.candidates[0].grounding_metadata |
|
|
if gm.search_entry_point and gm.search_entry_point.rendered_content: |
|
|
sources_html = gm.search_entry_point.rendered_content |
|
|
|
|
|
return final_imgs, final_txt, thought_imgs, thought_txt, sources_html |
|
|
|
|
|
|
|
|
|
|
|
def cleanup_old_files(): |
|
|
"""Supprime les fichiers vieux de plus de 1h toutes les 10 minutes.""" |
|
|
while True: |
|
|
try: |
|
|
now = time.time() |
|
|
cutoff = now - 3600 |
|
|
if os.path.exists(TEMP_CHAT_DIR): |
|
|
for filename in os.listdir(TEMP_CHAT_DIR): |
|
|
filepath = os.path.join(TEMP_CHAT_DIR, filename) |
|
|
if os.path.isfile(filepath): |
|
|
if os.path.getmtime(filepath) < cutoff: |
|
|
try: |
|
|
os.remove(filepath) |
|
|
print(f"🧹 Supprimé : {filename}") |
|
|
except Exception: |
|
|
pass |
|
|
except Exception as e: |
|
|
print(f"⚠️ Erreur worker : {e}") |
|
|
time.sleep(600) |
|
|
|
|
|
|
|
|
|
|
|
def update_api_key(new_key): |
|
|
if not new_key: |
|
|
return "⚠️ Clé invalide", None |
|
|
return "✅ Clé enregistrée pour cette session !", new_key |
|
|
|
|
|
def generate_studio(prompt, model_ui, ratio, resolution, grounding, user_api_key): |
|
|
"""Text-to-Image standard respectant types.GenerateContentConfig""" |
|
|
cli = get_client(user_api_key) |
|
|
model_name = MODELS[model_ui] |
|
|
|
|
|
|
|
|
img_conf = {"aspect_ratio": ratio} |
|
|
gen_conf = {"response_modalities": ["TEXT", "IMAGE"]} |
|
|
|
|
|
if "gemini-3" in model_name: |
|
|
img_conf["image_size"] = resolution |
|
|
|
|
|
gen_conf["thinking_config"] = types.ThinkingConfig(include_thoughts=True) |
|
|
if grounding: |
|
|
gen_conf["tools"] = [{"google_search": {}}] |
|
|
|
|
|
gen_conf["image_config"] = types.ImageConfig(**img_conf) |
|
|
|
|
|
try: |
|
|
print(f"🚀 Sending request [T2I]...") |
|
|
response = cli.models.generate_content( |
|
|
model=model_name, |
|
|
contents=[prompt], |
|
|
config=types.GenerateContentConfig(**gen_conf) |
|
|
) |
|
|
|
|
|
return process_response(response) |
|
|
except Exception as e: |
|
|
raise gr.Error(f"API Error: {str(e)}") |
|
|
|
|
|
def generate_composition(prompt, files, model_ui, ratio, resolution, grounding, user_api_key): |
|
|
"""Composition I2I (Supporte jusqu'à 14 images selon la doc)""" |
|
|
cli = get_client(user_api_key) |
|
|
model_name = MODELS[model_ui] |
|
|
|
|
|
if not files: raise gr.Error("No input images provided.") |
|
|
|
|
|
contents = [prompt] |
|
|
for p in files: |
|
|
try: |
|
|
contents.append(Image.open(p)) |
|
|
except: pass |
|
|
|
|
|
img_conf = {"aspect_ratio": ratio} |
|
|
gen_conf = {"response_modalities": ["TEXT", "IMAGE"]} |
|
|
|
|
|
if "gemini-3" in model_name: |
|
|
img_conf["image_size"] = resolution |
|
|
|
|
|
gen_conf["thinking_config"] = types.ThinkingConfig(include_thoughts=True) |
|
|
|
|
|
if grounding: |
|
|
gen_conf["tools"] = [{"google_search": {}}] |
|
|
|
|
|
gen_conf["image_config"] = types.ImageConfig(**img_conf) |
|
|
|
|
|
try: |
|
|
print(f"🚀 Sending request [I2I]") |
|
|
response = cli.models.generate_content( |
|
|
model=model_name, |
|
|
contents=contents, |
|
|
config=types.GenerateContentConfig(**gen_conf) |
|
|
) |
|
|
|
|
|
f_imgs, f_txt, t_imgs, t_txt, sources = process_response(response) |
|
|
|
|
|
full_text = f_txt |
|
|
|
|
|
if sources: full_text += f"\n\n{sources}" |
|
|
if t_txt: full_text += f"\n\n{t_txt}" |
|
|
|
|
|
return f_imgs, full_text |
|
|
except Exception as e: |
|
|
raise gr.Error(f"Error: {str(e)}") |
|
|
|
|
|
|
|
|
|
|
|
def chat_respond(message, history, chat_history_data, img_input, model_ui, grounding, ratio, resolution, user_api_key): |
|
|
"""Gestion du chat 'Stateless' conforme aux types Google GenAI""" |
|
|
|
|
|
if not user_api_key: raise gr.Error("API Key manquante") |
|
|
|
|
|
cli = get_client(user_api_key) |
|
|
model_name = MODELS[model_ui] |
|
|
|
|
|
tools = None |
|
|
thinking_conf = None |
|
|
|
|
|
|
|
|
img_conf = {"aspect_ratio": ratio} |
|
|
|
|
|
if "gemini-3" in model_name: |
|
|
img_conf["image_size"] = resolution |
|
|
|
|
|
thinking_conf = types.ThinkingConfig(include_thoughts=True) |
|
|
if grounding: |
|
|
tools = [{"google_search": {}}] |
|
|
|
|
|
|
|
|
chat = cli.chats.create( |
|
|
model=model_name, |
|
|
config=types.GenerateContentConfig( |
|
|
response_modalities=['TEXT', 'IMAGE'], |
|
|
tools=tools, |
|
|
thinking_config=thinking_conf, |
|
|
image_config=types.ImageConfig(**img_conf) |
|
|
), |
|
|
history=chat_history_data |
|
|
) |
|
|
|
|
|
|
|
|
send_contents = [message] |
|
|
if img_input: |
|
|
for img_path in img_input: |
|
|
send_contents.append(Image.open(img_path)) |
|
|
|
|
|
user_display_text = message |
|
|
if img_input: |
|
|
user_display_text += f"\n\n🖼️ *({len(img_input)} Images attached)*" |
|
|
user_message_obj = {"role": "user", "content": user_display_text} |
|
|
|
|
|
try: |
|
|
|
|
|
response = chat.send_message(send_contents) |
|
|
|
|
|
f_imgs, f_txt, t_imgs, t_txt, sources = process_response(response) |
|
|
|
|
|
|
|
|
bot_messages = [] |
|
|
|
|
|
if t_txt or t_imgs: |
|
|
thought_md = "🧠 **Model Thought Process:**\n" |
|
|
if t_txt: thought_md += f"> {t_txt}\n" |
|
|
if t_imgs: thought_md += f"*( + {len(t_imgs)} draft image(s) not displayed)*\n" |
|
|
thought_md += "---\n" |
|
|
bot_messages.append({"role": "assistant", "content": thought_md}) |
|
|
|
|
|
if f_txt: |
|
|
bot_messages.append({"role": "assistant", "content": f_txt}) |
|
|
|
|
|
|
|
|
if sources: |
|
|
bot_messages.append({"role": "assistant", "content": sources}) |
|
|
|
|
|
if f_imgs: |
|
|
for i, img in enumerate(f_imgs): |
|
|
unique_filename = f"chat_{uuid.uuid4()}_{i}.png" |
|
|
file_path = os.path.join(TEMP_CHAT_DIR, unique_filename) |
|
|
img.save(file_path) |
|
|
img_msg = {"path": file_path, "alt_text": "Generated Image"} |
|
|
bot_messages.append({"role": "assistant", "content": img_msg}) |
|
|
|
|
|
if not f_txt and not f_imgs and not t_txt and not sources: |
|
|
bot_messages.append({"role": "assistant", "content": "⚠️ *Empty response.*"}) |
|
|
|
|
|
|
|
|
u_parts = [types.Part.from_text(text=message)] |
|
|
if img_input: |
|
|
for img_path in img_input: |
|
|
with open(img_path, "rb") as f: |
|
|
img_bytes = f.read() |
|
|
u_parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg")) |
|
|
user_content_obj = types.Content(role="user", parts=u_parts) |
|
|
|
|
|
model_content_obj = response.candidates[0].content |
|
|
|
|
|
current_data = chat_history_data if chat_history_data else [] |
|
|
new_gemini_history = current_data + [user_content_obj, model_content_obj] |
|
|
|
|
|
new_ui_history = history + [user_message_obj] + bot_messages |
|
|
|
|
|
return "", None, new_ui_history, new_gemini_history, f_imgs |
|
|
|
|
|
except Exception as e: |
|
|
err_msg = f"❌ Error: {str(e)}" |
|
|
bot_err_obj = {"role": "assistant", "content": err_msg} |
|
|
return "", None, history + [user_message_obj, bot_err_obj], chat_history_data, [] |
|
|
|
|
|
def clear_chat(): |
|
|
return [], None, [] |
|
|
|
|
|
|
|
|
|
|
|
css = """ |
|
|
.container { max-width: 1200px; margin: auto; } |
|
|
h1 { text-align: center; color: #4F46E5; font-size: 2.5em; } |
|
|
.image-container img { max-height: 400px; width: auto; } |
|
|
""" |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Nano Vision Studio") as demo: |
|
|
|
|
|
gr.Markdown("# Nano 🍌 Vision Studio") |
|
|
gr.Markdown("### The Ultimate Interface: 4K Generation, Grounding, Multi-Image Composition & Iterative Chat") |
|
|
|
|
|
user_api_key_state = gr.State(os.environ.get("GOOGLE_API_KEY", "")) |
|
|
chat_state = gr.State(None) |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.TabItem("🔑 API Settings"): |
|
|
gr.Markdown("### ⚙️ API Configuration") |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=3): |
|
|
api_input = gr.Textbox(label="Google Gemini API Key", type="password", lines=1) |
|
|
with gr.Column(scale=1): |
|
|
api_btn = gr.Button("Save & Initialize 💾", variant="primary") |
|
|
api_status = gr.Markdown() |
|
|
api_btn.click(update_api_key, inputs=[api_input], outputs=[api_status, user_api_key_state]) |
|
|
|
|
|
|
|
|
with gr.TabItem("🎨 Creation Studio"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
t1_prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Describe the scene...") |
|
|
with gr.Group(): |
|
|
t1_model = gr.Dropdown(choices=list(MODELS.keys()), value="🧠 Gemini 3 Pro Preview (Recommended)", label="Model") |
|
|
with gr.Row(): |
|
|
t1_ratio = gr.Dropdown(RATIOS, value="16:9", label="Aspect Ratio") |
|
|
t1_res = gr.Dropdown(RESOLUTIONS, value="2K", label="Resolution (Pro only)") |
|
|
t1_grounding = gr.Checkbox(label="Google Search (Grounding)") |
|
|
t1_btn = gr.Button("Generate ✨", variant="primary", size="lg") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
t1_gallery = gr.Gallery(label="Final Images", columns=2, height="auto") |
|
|
|
|
|
t1_sources = gr.HTML(label="Grounding Sources") |
|
|
t1_text = gr.Markdown(label="Generated Text") |
|
|
with gr.Accordion("🧠 Thought Process", open=False): |
|
|
t1_thought_imgs = gr.Gallery(label="Visual Drafts", columns=4, height=150) |
|
|
|
|
|
t1_thought_txt = gr.Markdown(label="Thought Stream") |
|
|
|
|
|
t1_btn.click( |
|
|
generate_studio, |
|
|
inputs=[t1_prompt, t1_model, t1_ratio, t1_res, t1_grounding, user_api_key_state], |
|
|
outputs=[t1_gallery, t1_text, t1_thought_imgs, t1_thought_txt, t1_sources] |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("🛠️ Composition"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
t2_files = gr.File(label="Reference Images (Max 14)", file_count="multiple", type="filepath") |
|
|
t2_prompt = gr.Textbox(label="Instructions", lines=3) |
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
|
t2_model = gr.Dropdown(list(MODELS.keys()), value="🧠 Gemini 3 Pro Preview (Recommended)", label="Model") |
|
|
with gr.Row(): |
|
|
t2_ratio = gr.Dropdown(RATIOS, value="1:1", label="Aspect Ratio") |
|
|
t2_res = gr.Dropdown(RESOLUTIONS, value="1K", label="Output Resolution") |
|
|
t2_grounding = gr.Checkbox(label="Google Search (Grounding)") |
|
|
t2_btn = gr.Button("Run", variant="primary") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
t2_gallery = gr.Gallery(label="Result", columns=1) |
|
|
t2_text = gr.Markdown() |
|
|
|
|
|
t2_btn.click( |
|
|
generate_composition, |
|
|
inputs=[t2_prompt, t2_files, t2_model, t2_ratio, t2_res, t2_grounding, user_api_key_state], |
|
|
outputs=[t2_gallery, t2_text] |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("💬 Chat & Refinement"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
|
|
|
chat_history = gr.Chatbot(label="Session History", height=600) |
|
|
with gr.Row(): |
|
|
chat_input = gr.Textbox(label="Your Message", scale=4) |
|
|
|
|
|
chat_img = gr.File(label="Attach Images (Max 14)", file_count="multiple", type="filepath", height=100) |
|
|
with gr.Row(): |
|
|
chat_btn = gr.Button("Send", variant="primary") |
|
|
clear_btn = gr.Button("🗑️ New Session") |
|
|
with gr.Accordion("Chat Options", open=False): |
|
|
c_model = gr.Dropdown(list(MODELS.keys()), value="🧠 Gemini 3 Pro Preview (Recommended)", label="Model") |
|
|
with gr.Row(): |
|
|
c_ratio = gr.Dropdown(RATIOS, value="16:9", label="Aspect Ratio") |
|
|
c_res = gr.Dropdown(RESOLUTIONS, value="2K", label="Resolution (Pro only)") |
|
|
c_grounding = gr.Checkbox(label="Grounding") |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
chat_gallery_zoom = gr.Gallery(label="Zoom", columns=1, height="auto") |
|
|
|
|
|
chat_btn.click( |
|
|
chat_respond, |
|
|
inputs=[chat_input, chat_history, chat_state, chat_img, c_model, c_grounding, c_ratio, c_res, user_api_key_state], |
|
|
outputs=[chat_input, chat_img, chat_history, chat_state, chat_gallery_zoom] |
|
|
) |
|
|
clear_btn.click( |
|
|
clear_chat, |
|
|
inputs=[], |
|
|
outputs=[chat_history, chat_state, chat_gallery_zoom] |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("📚 Guide"): |
|
|
gr.Markdown(""" |
|
|
# Comprehensive Guide |
|
|
|
|
|
Welcome to the ultimate interface for **Nano Banana Pro** (Gemini 3 Pro) and **Nano Banana** (Gemini 2.5 Flash). |
|
|
|
|
|
## 🚀 Choose Your Model |
|
|
|
|
|
| Feature | ⚡ Gemini 2.5 Flash (Nano Banana) | 🧠 Gemini 3 Pro (Nano Banana Pro) | |
|
|
| :--- | :--- | :--- | |
|
|
| **Best For** | Speed, High Volume, Prototyping | Professional Assets, Complex Logic, Text Rendering | |
|
|
| **Resolution** | 1024x1024 (Native) | Up to **4K** (High Fidelity) | |
|
|
| **Inputs** | Text + Images | Text + up to **14 Reference Images** | |
|
|
| **Special** | Fast & Efficient | **Thinking Process**, **Search Grounding** | |
|
|
|
|
|
--- |
|
|
|
|
|
## ✨ Advanced Capabilities Explained |
|
|
|
|
|
### 1. 🧠 The "Thinking" Process (Pro Only) |
|
|
Nano Banana Pro doesn't just draw; it *thinks*. Before generating pixels, it reasons through your prompt to understand composition, lighting, and logic. |
|
|
* **In this App:** Check the **"Thought Process"** accordion in the *Creation Studio* to read the model's internal monologue and see draft visualizations (Thought Images). |
|
|
|
|
|
### 2. 🌍 Search Grounding (Real-Time Data) |
|
|
The model isn't stuck in the past. It can access **Google Search** to generate images based on live data. |
|
|
* **Try this:** "Visualize the current weather forecast for Tokyo as a modern chart." |
|
|
* **In this App:** Enable the **"Grounding"** checkbox. Sources will appear below the generated image. |
|
|
|
|
|
### 3. 🖼️ Advanced Composition (up to 14 Images) |
|
|
While Flash handles fewer inputs, Pro can mix up to **14 images**! |
|
|
* **Use Case:** Style transfer, maintaining character consistency, or complex collages. |
|
|
* **How:** Use the **"Composition"** tab to upload multiple reference files. |
|
|
|
|
|
--- |
|
|
|
|
|
## 💡 Prompting Masterclass |
|
|
|
|
|
To get the best results, follow these professional tips: |
|
|
|
|
|
* **Be Hyper-Specific:** Don't just say "a cat". Say *"A photorealistic close-up of a Siamese cat, golden hour lighting, captured with an 85mm lens"*. |
|
|
* **Provide Context:** Explain the intent. *"Create a logo for a high-end minimalist skincare brand"*. |
|
|
* **Positive Framing:** Instead of "no cars", describe the scene as *"an empty, deserted street"*. |
|
|
* **Iterate with Chat:** Don't expect perfection on turn #1. Use the **Chat & Refinement** tab to say *"Make the lighting warmer"* or *"Add a wizard hat"*. |
|
|
|
|
|
## ⚡ Performance Tips |
|
|
* **4K Generation:** Available only on Pro. It costs more but delivers stunning print-quality results. |
|
|
* **Aspect Ratios:** We support everything from **1:1** to **21:9** (Cinematic). |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
threading.Thread(target=cleanup_old_files, daemon=True).start() |
|
|
demo.queue(default_concurrency_limit=20) |
|
|
|
|
|
|
|
|
demo.launch( |
|
|
theme=gr.themes.Soft(), |
|
|
css=css, |
|
|
max_threads=40, |
|
|
show_error=True, |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=False |
|
|
) |