Spaces:

hasanbasbunar
/

Nano-Vision-Studio

Running

App Files Files Community

hasanbasbunar commited on 29 days ago

Commit

e2f7b5c

verified ·

1 Parent(s): 620aef1

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -34

app.py CHANGED Viewed

@@ -45,16 +45,18 @@ def safe_process_image(part):
         return None
 def process_response(response):
-    """Sépare le résultat final du processus de pensée (Thinking Mode)."""
     final_imgs, final_txt = [], ""
     thought_imgs, thought_txt = [], ""
     if not response or not response.parts:
-        return final_imgs, final_txt, thought_imgs, thought_txt
     print(f"\n--- RECEIVED ({len(response.parts)} parts) ---")
     for i, part in enumerate(response.parts):
-        # La doc mentionne l'attribut 'thought' pour le Gemini 3 Pro Thinking Mode
         is_thought = getattr(part, 'thought', False)
         if is_thought:
@@ -68,7 +70,14 @@ def process_response(response):
                 img = safe_process_image(part)
                 if img: final_imgs.append(img)
-    return final_imgs, final_txt, thought_imgs, thought_txt
 # --- WORKER NETTOYAGE ---
@@ -110,6 +119,8 @@ def generate_studio(prompt, model_ui, ratio, resolution, grounding, user_api_key
     if "gemini-3" in model_name:
         img_conf["image_size"] = resolution
         if grounding:
             gen_conf["tools"] = [{"google_search": {}}]
@@ -122,6 +133,7 @@ def generate_studio(prompt, model_ui, ratio, resolution, grounding, user_api_key
             contents=[prompt],
             config=types.GenerateContentConfig(**gen_conf)
         )
         return process_response(response)
     except Exception as e:
         raise gr.Error(f"API Error: {str(e)}")
@@ -136,7 +148,6 @@ def generate_composition(prompt, files, model_ui, ratio, resolution, user_api_ke
     contents = [prompt]
     for p in files:
         try:
-            # La doc Python montre qu'on peut passer des objets PIL Image directement
             contents.append(Image.open(p))
         except: pass
@@ -145,6 +156,8 @@ def generate_composition(prompt, files, model_ui, ratio, resolution, user_api_ke
     if "gemini-3" in model_name:
         img_conf["image_size"] = resolution
     gen_conf["image_config"] = types.ImageConfig(**img_conf)
@@ -155,14 +168,19 @@ def generate_composition(prompt, files, model_ui, ratio, resolution, user_api_ke
             contents=contents,
             config=types.GenerateContentConfig(**gen_conf)
         )
-        f_imgs, f_txt, t_imgs, t_txt = process_response(response)
         full_text = f_txt
-        if t_txt: full_text += f"\n\n--- 🧠 MODEL REASONING ---\n{t_txt}"
         return f_imgs, full_text
     except Exception as e:
         raise gr.Error(f"Error: {str(e)}")
-# --- CHAT LOGIC (STRICT DOC COMPLIANCE) ---
 def chat_respond(message, history, chat_history_data, img_input, model_ui, grounding, user_api_key):
     """Gestion du chat 'Stateless' conforme aux types Google GenAI"""
@@ -173,30 +191,30 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
     model_name = MODELS[model_ui]
     tools = None
-    if grounding and "gemini-3" in model_name:
-        tools = [{"google_search": {}}]
-    # 1. Restauration de l'historique (Respecte les Thought Signatures)
-    # On passe la liste brute des objets 'Content' stockés précédemment
     chat = cli.chats.create(
         model=model_name,
         config=types.GenerateContentConfig(
             response_modalities=['TEXT', 'IMAGE'],
-            tools=tools
         ),
         history=chat_history_data
     )
-    # 2. Préparation du contenu utilisateur (User Content)
-    # Note: Pour l'historique manuel, on utilise des types.Part explicitement
-    # pour garantir la sérialisation correcte si nécessaire.
-    # Contenu pour l'envoi immédiat (la méthode .send_message accepte PIL Image ou String)
     send_contents = [message]
     if img_input:
         send_contents.append(Image.open(img_input))
-    # Contenu pour l'affichage UI
     user_display_text = message
     if img_input:
         user_display_text += "\n\n🖼️ *(Image attached)*"
@@ -205,7 +223,8 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
     try:
         # 3. Envoi au modèle
         response = chat.send_message(send_contents)
-        f_imgs, f_txt, t_imgs, t_txt = process_response(response)
         # 4. Construction réponse UI
         bot_messages = []
@@ -219,7 +238,11 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
         if f_txt:
             bot_messages.append({"role": "assistant", "content": f_txt})
         if f_imgs:
             for i, img in enumerate(f_imgs):
                 unique_filename = f"chat_{uuid.uuid4()}_{i}.png"
@@ -228,14 +251,10 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
                 img_msg = {"path": file_path, "alt_text": "Generated Image"}
                 bot_messages.append({"role": "assistant", "content": img_msg})
-        if not f_txt and not f_imgs and not t_txt:
              bot_messages.append({"role": "assistant", "content": "⚠️ *Empty response.*"})
-        # 5. Mise à jour de l'historique Gemini (CRITIQUE POUR LA DOC)
-        # Nous construisons manuellement les objets Content pour le State
-        # Cela simule ce que le SDK ferait en interne si l'objet Chat persistait.
-        # A. Création Content User
         u_parts = [types.Part.from_text(text=message)]
         if img_input:
             with open(img_input, "rb") as f:
@@ -243,14 +262,11 @@ def chat_respond(message, history, chat_history_data, img_input, model_ui, groun
             u_parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg"))
         user_content_obj = types.Content(role="user", parts=u_parts)
-        # B. Récupération Content Model (Contient les signatures de pensée cachées)
         model_content_obj = response.candidates[0].content
-        # C. Fusion
         current_data = chat_history_data if chat_history_data else []
         new_gemini_history = current_data + [user_content_obj, model_content_obj]
-        # Historique UI
         new_ui_history = history + [user_message_obj] + bot_messages
         return "", new_ui_history, new_gemini_history, f_imgs
@@ -271,13 +287,14 @@ h1 { text-align: center; color: #4F46E5; font-size: 2.5em; }
 .image-container img { max-height: 400px; width: auto; }
 """
 with gr.Blocks(title="Nano Vision Studio") as demo:
     gr.Markdown("# Nano 🍌 Vision Studio")
     gr.Markdown("### The Ultimate Interface: 4K Generation, Grounding, Multi-Image Composition & Iterative Chat")
     user_api_key_state = gr.State(os.environ.get("GOOGLE_API_KEY", ""))
-    chat_state = gr.State(None) # Stocke la liste des types.Content
     with gr.Tabs():
         # --- TAB 0 : API ---
@@ -306,15 +323,18 @@ with gr.Blocks(title="Nano Vision Studio") as demo:
                 with gr.Column(scale=2):
                     t1_gallery = gr.Gallery(label="Final Images", columns=2, height="auto")
                     t1_text = gr.Markdown(label="Generated Text")
                     with gr.Accordion("🧠 Thought Process", open=False):
                         t1_thought_imgs = gr.Gallery(label="Visual Drafts", columns=4, height=150)
-                        t1_thought_txt = gr.Textbox(label="Thought Stream", interactive=False, lines=4)
             t1_btn.click(
                 generate_studio,
                 inputs=[t1_prompt, t1_model, t1_ratio, t1_res, t1_grounding, user_api_key_state],
-                outputs=[t1_gallery, t1_text, t1_thought_imgs, t1_thought_txt]
             )
         # --- TAB 2 : COMPOSITION ---
@@ -344,7 +364,7 @@ with gr.Blocks(title="Nano Vision Studio") as demo:
         with gr.TabItem("💬 Chat & Refinement"):
             with gr.Row():
                 with gr.Column(scale=2):
-                    # type="messages" est requis pour Gradio 6
                     chat_history = gr.Chatbot(label="Session History", height=600)
                     with gr.Row():
                         chat_input = gr.Textbox(label="Your Message", scale=4)
@@ -386,6 +406,8 @@ with gr.Blocks(title="Nano Vision Studio") as demo:
 if __name__ == "__main__":
     threading.Thread(target=cleanup_old_files, daemon=True).start()
     demo.queue(default_concurrency_limit=20)
     demo.launch(
         theme=gr.themes.Soft(),
         css=css,

         return None
 def process_response(response):
+    """Sépare le résultat final, le processus de pensée (Thinking Mode) et les sources."""
     final_imgs, final_txt = [], ""
     thought_imgs, thought_txt = [], ""
+    sources_html = None # <--- AJOUT : Conteneur pour les sources
     if not response or not response.parts:
+        return final_imgs, final_txt, thought_imgs, thought_txt, sources_html
     print(f"\n--- RECEIVED ({len(response.parts)} parts) ---")
+    # 1. Parsing du contenu (Images & Texte & Pensées)
     for i, part in enumerate(response.parts):
         is_thought = getattr(part, 'thought', False)
         if is_thought:
                 img = safe_process_image(part)
                 if img: final_imgs.append(img)
+    # 2. Parsing du Grounding (Sources) - AJOUT
+    # Les métadonnées sont au niveau du 'candidate', pas des 'parts'
+    if response.candidates and response.candidates[0].grounding_metadata:
+        gm = response.candidates[0].grounding_metadata
+        if gm.search_entry_point and gm.search_entry_point.rendered_content:
+            sources_html = gm.search_entry_point.rendered_content
+    return final_imgs, final_txt, thought_imgs, thought_txt, sources_html
 # --- WORKER NETTOYAGE ---
     if "gemini-3" in model_name:
         img_conf["image_size"] = resolution
+        # <--- AJOUT : Activation du Thinking Mode
+        gen_conf["thinking_config"] = types.ThinkingConfig(include_thoughts=True)
         if grounding:
             gen_conf["tools"] = [{"google_search": {}}]
             contents=[prompt],
             config=types.GenerateContentConfig(**gen_conf)
         )
+        # Retourne maintenant 5 éléments (avec sources)
         return process_response(response)
     except Exception as e:
         raise gr.Error(f"API Error: {str(e)}")
     contents = [prompt]
     for p in files:
         try:
             contents.append(Image.open(p))
         except: pass
     if "gemini-3" in model_name:
         img_conf["image_size"] = resolution
+        # <--- AJOUT : Activation du Thinking Mode aussi ici par sécurité
+        gen_conf["thinking_config"] = types.ThinkingConfig(include_thoughts=True)
     gen_conf["image_config"] = types.ImageConfig(**img_conf)
             contents=contents,
             config=types.GenerateContentConfig(**gen_conf)
         )
+        # Gestion des 5 valeurs de retour
+        f_imgs, f_txt, t_imgs, t_txt, sources = process_response(response)
         full_text = f_txt
+        # Ajout des sources et pensées au texte principal pour cet onglet
+        if sources: full_text += f"\n\n{sources}"
+        if t_txt: full_text += f"\n\n{t_txt}"
         return f_imgs, full_text
     except Exception as e:
         raise gr.Error(f"Error: {str(e)}")
+# --- CHAT LOGIC ---
 def chat_respond(message, history, chat_history_data, img_input, model_ui, grounding, user_api_key):
     """Gestion du chat 'Stateless' conforme aux types Google GenAI"""
     model_name = MODELS[model_ui]
     tools = None
+    thinking_conf = None # <--- AJOUT variable
+    if "gemini-3" in model_name:
+        # <--- AJOUT : Config Thinking
+        thinking_conf = types.ThinkingConfig(include_thoughts=True)
+        if grounding:
+            tools = [{"google_search": {}}]
+    # 1. Restauration de l'historique
     chat = cli.chats.create(
         model=model_name,
         config=types.GenerateContentConfig(
             response_modalities=['TEXT', 'IMAGE'],
+            tools=tools,
+            thinking_config=thinking_conf # <--- AJOUT
         ),
         history=chat_history_data
     )
+    # 2. Préparation du contenu utilisateur
     send_contents = [message]
     if img_input:
         send_contents.append(Image.open(img_input))
     user_display_text = message
     if img_input:
         user_display_text += "\n\n🖼️ *(Image attached)*"
     try:
         # 3. Envoi au modèle
         response = chat.send_message(send_contents)
+        # Récupération des 5 valeurs
+        f_imgs, f_txt, t_imgs, t_txt, sources = process_response(response)
         # 4. Construction réponse UI
         bot_messages = []
         if f_txt:
             bot_messages.append({"role": "assistant", "content": f_txt})
+        # <--- AJOUT : Affichage des sources dans le chat
+        if sources:
+            bot_messages.append({"role": "assistant", "content": sources})
         if f_imgs:
             for i, img in enumerate(f_imgs):
                 unique_filename = f"chat_{uuid.uuid4()}_{i}.png"
                 img_msg = {"path": file_path, "alt_text": "Generated Image"}
                 bot_messages.append({"role": "assistant", "content": img_msg})
+        if not f_txt and not f_imgs and not t_txt and not sources:
              bot_messages.append({"role": "assistant", "content": "⚠️ *Empty response.*"})
+        # 5. Mise à jour de l'historique Gemini
         u_parts = [types.Part.from_text(text=message)]
         if img_input:
             with open(img_input, "rb") as f:
             u_parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg"))
         user_content_obj = types.Content(role="user", parts=u_parts)
         model_content_obj = response.candidates[0].content
         current_data = chat_history_data if chat_history_data else []
         new_gemini_history = current_data + [user_content_obj, model_content_obj]
         new_ui_history = history + [user_message_obj] + bot_messages
         return "", new_ui_history, new_gemini_history, f_imgs
 .image-container img { max-height: 400px; width: auto; }
 """
+# <--- CORRECTION : Suppression de 'css' et 'theme' ici
 with gr.Blocks(title="Nano Vision Studio") as demo:
     gr.Markdown("# Nano 🍌 Vision Studio")
     gr.Markdown("### The Ultimate Interface: 4K Generation, Grounding, Multi-Image Composition & Iterative Chat")
     user_api_key_state = gr.State(os.environ.get("GOOGLE_API_KEY", ""))
+    chat_state = gr.State(None)
     with gr.Tabs():
         # --- TAB 0 : API ---
                 with gr.Column(scale=2):
                     t1_gallery = gr.Gallery(label="Final Images", columns=2, height="auto")
+                    # <--- AJOUT : Composant pour afficher les sources HTML
+                    t1_sources = gr.HTML(label="Grounding Sources")
                     t1_text = gr.Markdown(label="Generated Text")
                     with gr.Accordion("🧠 Thought Process", open=False):
                         t1_thought_imgs = gr.Gallery(label="Visual Drafts", columns=4, height=150)
+                        # t1_thought_txt = gr.Textbox(label="Thought Stream", interactive=False, lines=4)
+                        t1_thought_txt = gr.Markdown(label="Thought Stream")
             t1_btn.click(
                 generate_studio,
                 inputs=[t1_prompt, t1_model, t1_ratio, t1_res, t1_grounding, user_api_key_state],
+                outputs=[t1_gallery, t1_text, t1_thought_imgs, t1_thought_txt, t1_sources] # <--- Ajout t1_sources dans les outputs
             )
         # --- TAB 2 : COMPOSITION ---
         with gr.TabItem("💬 Chat & Refinement"):
             with gr.Row():
                 with gr.Column(scale=2):
+                    # <--- CORRECTION : Suppression de 'type="messages"' ici
                     chat_history = gr.Chatbot(label="Session History", height=600)
                     with gr.Row():
                         chat_input = gr.Textbox(label="Your Message", scale=4)
 if __name__ == "__main__":
     threading.Thread(target=cleanup_old_files, daemon=True).start()
     demo.queue(default_concurrency_limit=20)
+    # <--- CORRECTION : Ajout de 'css' et 'theme' ici
     demo.launch(
         theme=gr.themes.Soft(),
         css=css,