File size: 24,529 Bytes
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
 
 
 
 
 
e05b4cd
 
2ac9f85
 
e05b4cd
 
 
 
 
 
2ac9f85
 
 
 
e05b4cd
 
 
 
 
 
 
 
2ac9f85
 
e05b4cd
2ac9f85
 
e05b4cd
 
2ac9f85
e05b4cd
2ac9f85
 
e05b4cd
2ac9f85
e05b4cd
 
 
 
2ac9f85
 
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
 
e05b4cd
 
 
 
 
2ac9f85
 
e05b4cd
 
 
2ac9f85
 
 
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
 
e05b4cd
 
 
2ac9f85
 
e05b4cd
 
2ac9f85
 
e05b4cd
 
2ac9f85
 
 
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
e05b4cd
 
 
2ac9f85
 
 
e05b4cd
 
2ac9f85
e05b4cd
2ac9f85
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
 
 
 
 
e05b4cd
2ac9f85
 
 
 
e05b4cd
2ac9f85
 
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
 
 
e05b4cd
2ac9f85
 
 
 
 
 
 
 
 
 
 
e05b4cd
 
2ac9f85
 
 
 
 
e05b4cd
2ac9f85
 
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
 
e05b4cd
2ac9f85
 
 
 
 
e05b4cd
2ac9f85
 
 
e05b4cd
 
2ac9f85
 
e05b4cd
 
2ac9f85
 
 
 
e05b4cd
2ac9f85
 
 
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
e05b4cd
2ac9f85
 
 
e05b4cd
2ac9f85
 
e05b4cd
 
 
 
2ac9f85
 
 
 
 
e05b4cd
2ac9f85
e05b4cd
2ac9f85
e05b4cd
2ac9f85
 
 
e05b4cd
 
2ac9f85
 
 
e05b4cd
2ac9f85
 
 
e05b4cd
2ac9f85
 
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
 
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
 
 
e05b4cd
 
 
2ac9f85
 
e05b4cd
 
2ac9f85
 
e05b4cd
2ac9f85
 
e05b4cd
2ac9f85
 
e05b4cd
 
 
 
2ac9f85
e05b4cd
2ac9f85
 
e05b4cd
 
 
 
 
2ac9f85
e05b4cd
2ac9f85
e05b4cd
 
 
2ac9f85
 
e05b4cd
 
 
2ac9f85
 
 
e05b4cd
 
 
 
 
2ac9f85
e05b4cd
2ac9f85
 
 
 
e05b4cd
 
 
2ac9f85
e05b4cd
2ac9f85
 
 
 
 
 
e05b4cd
2ac9f85
e05b4cd
2ac9f85
 
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
e05b4cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ac9f85
e05b4cd
2ac9f85
 
 
 
 
 
e05b4cd
 
2ac9f85
e05b4cd
 
 
 
2ac9f85
e05b4cd
 
2ac9f85
 
e05b4cd
 
2ac9f85
6b7652e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
# =======================================================================================
#  ScienceBrain.AI - A Multi-Modal Gradio App
#  Author: Your Name Here (with a little help from a friendly AI)
#  Version: 2.0 - Now with 100% more witty comments!
# =======================================================================================

# ---------------------------------------------------------------------------------------
#  🐍 Step 1: Import the Ark of Libraries
#  "Any fool can write code that a computer can understand. Good programmers write code
#  that humans can understand." - Martin Fowler. So let's make this understandable.
# ---------------------------------------------------------------------------------------

# --- Standard Library Imports ---
import os
import base64
import glob
import json
import re
import zipfile
from io import BytesIO
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed

# --- Third-Party Imports ---
import gradio as gr
import openai
from openai import OpenAI
import pytz
import cv2
from PIL import Image
from PyPDF2 import PdfReader
from dotenv import load_dotenv

# ---------------------------------------------------------------------------------------
#  βš™οΈ Step 2: Configuration & Global Domination... I mean, Initialization
#  "The best way to predict the future is to invent it." - Alan Kay
#  Here, we're inventing the configuration for our glorious app.
# ---------------------------------------------------------------------------------------

# Load environment variables from a .env file. Perfect for local testing.
# On Hugging Face Spaces, these should be set in the "Secrets" section.
load_dotenv()

# --- API Keys & Endpoints ---
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_ORG_ID = os.getenv('OPENAI_ORG_ID')
HF_KEY = os.getenv('HF_KEY')  # Your Hugging Face Token, the key to the OSS kingdom.
FIREWORKS_API_BASE = "https://api.fireworks.ai/inference/v1"

# --- Model Configuration ---
DEFAULT_OSS_MODEL = "openai/gpt-oss-120b"
DEFAULT_OPENAI_MODEL = "gpt-4o-2024-05-13"
# Greetings from Mound, Minnesota! A little local flavor in the code.

# ---------------------------------------------------------------------------------------
#  πŸ› οΈ Step 3: Core Helper Functions - The Unsung Heroes
#  These functions do the behind-the-scenes work. They're like the roadies of a rock concert.
# ---------------------------------------------------------------------------------------

def get_llm_client(model_name: str) -> OpenAI:
    """
    🐍 Creates and returns an OpenAI client configured for the selected model.
    This function is the bouncer of our AI club. It checks your credentials (API keys)
    and directs you to the right VIP lounge (API endpoint).

    Args:
        model_name (str): The name of the model to use.

    Returns:
        OpenAI: A configured OpenAI client instance.

    Raises:
        gr.Error: If the required API key for the selected model is not found.
    """
    # If we're using the cool, open-source model...
    if model_name == DEFAULT_OSS_MODEL:
        if not HF_KEY:
            # "Houston, we have a problem." - Apollo 13
            raise gr.Error("Hugging Face API Key (HF_KEY) is missing! Add it to your Space secrets.")
        # Point the client to the Fireworks.ai proxy for the OSS model.
        return OpenAI(api_key=HF_KEY, base_url=FIREWORKS_API_BASE)
    # Otherwise, for the standard OpenAI models...
    else:
        if not OPENAI_API_KEY:
            # "I've got a bad feeling about this." - Han Solo
            raise gr.Error("OpenAI API Key is missing! Add it to your Space secrets.")
        # Use the standard OpenAI client configuration.
        return OpenAI(api_key=OPENAI_API_KEY, organization=OPENAI_ORG_ID)

def generate_filename(prompt: str, file_type: str, original_name: str = None) -> str:
    """
    πŸ’Ύ Generates a safe, unique, and descriptive filename.
    Because 'output_1.txt' is for amateurs. We're creating masterpieces here,
    and they deserve a proper name.

    Args:
        prompt (str): The user's prompt, used to make the name descriptive.
        file_type (str): The file extension (e.g., "md", "png").
        original_name (str, optional): The original name of an uploaded file.

    Returns:
        str: A clean, timestamped filename.
    """
    # Get the current time in a sane timezone. No one likes UTC confusion.
    central = pytz.timezone('US/Central')
    safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
    
    # "I'm going to make him an offer he can't refuse." - The Godfather
    # We're making an offer to the filesystem it can't refuse by removing illegal characters.
    safe_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt).strip()[:50]
    
    # If it's a response to an uploaded file, include the original name.
    if original_name:
        base_name = os.path.splitext(original_name)[0]
        file_stem = f"{safe_date_time}_{safe_prompt}_{base_name}"[:100]
    else:
        file_stem = f"{safe_date_time}_{safe_prompt}"[:100]
        
    return f"{file_stem}.{file_type}"

def create_and_save_file(content: str, prompt: str, should_save: bool, file_type: str = "md", original_name: str = None):
    """
    ✍️ Saves content to a file if the user has blessed us with permission.
    "With great power comes great responsibility." - Uncle Ben. The power to save
    is in the user's hands.

    Args:
        content (str): The generated content to save.
        prompt (str): The user prompt that generated the content.
        should_save (bool): A flag indicating whether to save the file.
        file_type (str, optional): The file extension. Defaults to "md".
        original_name (str, optional): The original name of an input file.
    """
    if not should_save:
        print("πŸ’Ύ Save checkbox is unchecked. Skipping file save.")
        return

    filename = generate_filename(prompt, file_type, original_name)
    try:
        with open(filename, "w", encoding="utf-8") as f:
            # We save both the prompt and the response for context. It's just polite.
            full_content = f"πŸ’‘ PROMPT:\n{prompt}\n\n{'='*20}\n\nπŸ€– RESPONSE:\n{content}"
            f.write(full_content)
        print(f"βœ… Successfully saved conversation to {filename}")
    except Exception as e:
        # It's not a bug, it's an undocumented feature.
        print(f"πŸ”₯ Error saving file {filename}: {e}")

# ---------------------------------------------------------------------------------------
#  🧠 Step 4: AI Processing Functions - Where the Magic Happens
#  "Any sufficiently advanced technology is indistinguishable from magic." - Arthur C. Clarke
# ---------------------------------------------------------------------------------------

def process_text(client: OpenAI, model_name: str, history: list, text_input: str, should_save: bool) -> list:
    """
    πŸ’¬ Handles a text-only prompt. The bread and butter of chat apps.
    It's simple, elegant, and gets the job done. Like a little black dress.

    Args:
        client (OpenAI): The configured OpenAI client.
        model_name (str): The name of the AI model.
        history (list): The conversation history in OpenAI format.
        text_input (str): The user's text prompt.
        should_save (bool): Flag to determine if the output should be saved.

    Returns:
        list: The updated conversation history.
    """
    history.append({"role": "user", "content": text_input})
    
    completion = client.chat.completions.create(
        model=model_name,
        messages=history,
        stream=False  # We're keeping it simple for now. Streaming is a whole other party.
    )
    response = completion.choices[0].message.content
    
    history.append({"role": "assistant", "content": response})
    create_and_save_file(response, text_input, should_save)
    return history

def process_image(client: OpenAI, model_name: str, history: list, image_path: str, user_prompt: str, should_save: bool) -> list:
    """
    πŸ–ΌοΈ Processes an image with a text prompt. A picture is worth a thousand words,
    but with AI, it can be worth a thousand lines of code, a poem, or a recipe.

    Args:
        client (OpenAI): The configured OpenAI client.
        model_name (str): The name of the AI model.
        history (list): The conversation history.
        image_path (str): The local path to the uploaded image.
        user_prompt (str): The text prompt accompanying the image.
        should_save (bool): Flag to determine if the output should be saved.

    Returns:
        list: The updated conversation history.
    """
    # "I'll be back." - The Terminator. The image will be back, but as Base64.
    with open(image_path, "rb") as img_file:
        base64_image = base64.b64encode(img_file.read()).decode("utf-8")
    
    # Construct the special message format for multimodal input.
    image_message = {
        "role": "user", 
        "content": [
            {"type": "text", "text": user_prompt},
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
        ]
    }
    history.append(image_message)

    response = client.chat.completions.create(
        model=model_name,
        messages=history,
        temperature=0.0 # Let's be factual with images.
    )
    image_response = response.choices[0].message.content
    history.append({"role": "assistant", "content": image_response})
    
    original_name = os.path.basename(image_path)
    create_and_save_file(image_response, user_prompt, should_save, original_name=original_name)
    return history

def process_audio(client: OpenAI, model_name: str, history: list, audio_path: str, user_prompt: str, should_save: bool) -> list:
    """
    🎀 Transcribes audio using Whisper, then sends the transcript to the chat model.
    "Listen to them. The children of the night. What music they make!" - Dracula.
    We're listening, and turning that music into text.

    Args:
        client (OpenAI): The configured OpenAI client.
        model_name (str): The name of the AI model.
        history (list): The conversation history.
        audio_path (str): Path to the uploaded audio file.
        user_prompt (str): The text prompt to guide the response to the transcript.
        should_save (bool): Flag to determine if the output should be saved.

    Returns:
        list: The updated conversation history.
    """
    try:
        with open(audio_path, "rb") as audio_file:
            # Let Whisper do its thing. It's surprisingly good at it.
            transcription = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file
            ).text
        
        # Combine the user's prompt with the new transcript for full context.
        full_prompt = f"{user_prompt}\n\n--- Audio Transcription ---\n{transcription}"
        history.append({"role": "user", "content": full_prompt})

        # Now, get a response from the main chat model.
        completion = client.chat.completions.create(model=model_name, messages=history)
        response = completion.choices[0].message.content
        history.append({"role": "assistant", "content": response})
        
        create_and_save_file(response, full_prompt, should_save, original_name=os.path.basename(audio_path))
        
    except openai.BadRequestError as e:
        raise gr.Error(f"Audio processing error: {e}")
    except Exception as e:
        raise gr.Error(f"An unexpected error occurred during audio processing: {e}")
        
    return history

def process_video(client: OpenAI, model_name: str, history: list, video_path: str, user_prompt: str, should_save: bool) -> list:
    """
    🎬 Processes a video by extracting frames and audio for a comprehensive summary.
    "Life moves pretty fast. If you don't stop and look around once in a while,
    you could miss it." - Ferris Bueller. We're stopping and looking, frame by frame.

    Args:
        client (OpenAI): The configured OpenAI client.
        model_name (str): The name of the AI model.
        history (list): The conversation history.
        video_path (str): Path to the uploaded video file.
        user_prompt (str): The text prompt for the video summary.
        should_save (bool): Flag to determine if the output should be saved.

    Returns:
        list: The updated conversation history.
    """
    try:
        # --- Frame Extraction ---
        # "I'm ready for my close-up, Mr. DeMille." - Sunset Boulevard
        base64Frames = []
        video = cv2.VideoCapture(video_path)
        fps = video.get(cv2.CAP_PROP_FPS)
        if not fps > 0: raise gr.Error("Could not read video file. Is it valid?")
        
        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        frames_to_skip = int(fps * 2)  # One frame every 2 seconds.
        for i in range(0, total_frames, frames_to_skip):
            video.set(cv2.CAP_PROP_POS_FRAMES, i)
            success, frame = video.read()
            if not success: break
            _, buffer = cv2.imencode(".jpg", frame)
            base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
        video.release()
        if not base64Frames: raise gr.Error("Could not extract any frames from the video.")

        # --- Construct the Message Payload ---
        # Start with the frames.
        messages = [
            {"type": "text", "text": "These are frames from a video. Please analyze them."},
            *map(lambda x: {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{x}", "detail": "low"}}, base64Frames),
        ]

        # --- Audio Extraction & Transcription ---
        audio_path = None
        try:
            with VideoFileClip(video_path) as clip:
                if clip.audio:
                    audio_path = "temp_video_audio.mp3"
                    clip.audio.write_audiofile(audio_path, bitrate="32k", logger=None)
            
            if audio_path:
                with open(audio_path, "rb") as audio_file:
                    transcript = client.audio.transcriptions.create(model="whisper-1", file=audio_file).text
                messages.append({"type": "text", "text": f"--- Video Transcription ---\n{transcript}"})
                os.remove(audio_path) # "Leave the gun. Take the cannoli." - Clean up after ourselves.
        except Exception as e:
            print(f"⚠️ Audio extraction/transcription failed or skipped: {e}")
        
        # Finally, add the user's guiding prompt.
        messages.append({"type": "text", "text": user_prompt})
        history.append({"role": "user", "content": messages})
        
        # --- Get the Final AI Response ---
        response = client.chat.completions.create(model=model_name, messages=history)
        result = response.choices[0].message.content
        history.append({"role": "assistant", "content": result})
        
        create_and_save_file(result, user_prompt, should_save, original_name=os.path.basename(video_path))
        return history

    except Exception as e:
        raise gr.Error(f"Video processing failed spectacularly: {str(e)}")


# ---------------------------------------------------------------------------------------
#  πŸ–ΌοΈ Step 5: Gradio UI & Event Handlers - The Face of the Operation
#  "The noblest art is that of making others happy." - P.T. Barnum.
#  Our UI's goal is to make the user happy. Or at least not frustrated.
# ---------------------------------------------------------------------------------------

def convert_history_to_openai_format(gradio_history: list) -> list:
    """
    πŸ”„ Converts Gradio's chat history format to the OpenAI API format.
    It's like translating from English to Klingon, but for dictionaries.

    Args:
        gradio_history (list): History from a gr.Chatbot component.

    Returns:
        list: History formatted for the OpenAI API.
    """
    openai_history = []
    for user_msg, bot_msg in gradio_history:
        if user_msg:
            # Handle complex multimodal user messages for history
            if isinstance(user_msg, tuple):
                text, file_path = user_msg
                # This part needs to be more robust if we want to "replay" multimodal history
                openai_history.append({"role": "user", "content": text})
            else:
                openai_history.append({"role": "user", "content": user_msg})
        if bot_msg:
            openai_history.append({"role": "assistant", "content": bot_msg})
    return openai_history

def get_file_processor(file_path: str):
    """
    πŸ” Determines which processing function to use based on file extension.
    A simple but elegant router. The Grand Central Station of file handling.

    Args:
        file_path (str): The path to the file.

    Returns:
        function: The appropriate processing function or None.
    """
    ext = os.path.splitext(file_path)[1].lower()
    if ext in ['.png', '.jpg', '.jpeg', '.gif', '.webp']: return process_image
    if ext in ['.mp3', '.wav', 'm4a', 'flac', 'ogg']: return process_audio
    if ext in ['.mp4', '.mov', 'avi']: return process_video
    return None

def handle_multimodal_submit(message: dict, history: list, model_name: str, should_save: bool):
    """
    πŸš€ The main event handler for the chat interface.
    This function is the quarterback. It takes the snap (user input), reads the
    defense (checks for files), and makes the play (calls the right processor).

    Args:
        message (dict): The output from the gr.MultimodalTextbox.
        history (list): The current chat history in Gradio format.
        model_name (str): The selected AI model.
        should_save (bool): The state of the save checkbox.

    Yields:
        Updates to the Gradio UI components.
    """
    text_prompt = message["text"]
    files = message["files"]
    
    # --- Optimistic UI Update ---
    # Show the user's message in the chat immediately. It feels more responsive.
    # "Carpe diem. Seize the day, boys. Make your lives extraordinary." - Dead Poets Society
    user_turn_content = text_prompt
    if files:
        file_names = ", ".join([os.path.basename(f.name) for f in files])
        user_turn_content += f"\n\n*πŸ“Ž Attached: {file_names}*"
    
    history.append([user_turn_content, None])
    yield history, gr.MultimodalTextbox(value=None, interactive=False) # Disable input while processing

    try:
        # Get the right AI client for the job.
        client = get_llm_client(model_name)
        
        # Convert history to the format our AI overlords demand.
        openai_history = convert_history_to_openai_format(history[:-1]) # Exclude the current turn
        
        # --- Route to the Correct Processor ---
        if not files:
            # It's just text. Easy peasy.
            updated_openai_history = process_text(client, model_name, openai_history, text_prompt, should_save)
        else:
            # We have files! To the file-type-switch-case-mobile!
            file_path = files[0].name # Process the first file.
            processor = get_file_processor(file_path)
            if processor:
                updated_openai_history = processor(client, model_name, openai_history, file_path, text_prompt, should_save)
            else:
                raise gr.Error(f"Unsupported file type: {os.path.splitext(file_path)[1]}")
        
        # Update the last message in the Gradio history with the AI's response.
        history[-1][1] = updated_openai_history[-1]['content']
        yield history, gr.MultimodalTextbox(value=None, interactive=True)

    except Exception as e:
        # "Well, nobody's perfect." - Some Like It Hot
        # If something went wrong, let the user know and re-enable the input.
        history[-1][1] = f"**πŸ”₯ An Error Occurred:** {str(e)}"
        yield history, gr.MultimodalTextbox(value=message, interactive=True)

def update_file_list_display(file_types: list):
    """
    πŸ”„ Refreshes the list of generated files in the sidebar.
    It's like hitting F5, but with more Python.
    """
    if not file_types: return gr.update(choices=[], value=[])
    
    all_files = [f for f in glob.glob("*.*") if os.path.splitext(f)[1].lower() in file_types and len(os.path.splitext(f)[0]) >= 10]
    all_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
    return gr.update(choices=all_files, value=[])

def delete_selected_files(files_to_delete: list, current_filter: list):
    """
    πŸ—‘οΈ Deletes the files selected by the user. "Execute Order 66."
    """
    if not files_to_delete:
        gr.Warning("No files selected to delete. Are you just testing the button?")
        return update_file_list_display(current_filter)

    for file_path in files_to_delete:
        try:
            os.remove(file_path)
        except OSError as e:
            gr.Warning(f"Could not delete {file_path}. It's probably hiding. Error: {e}")
    
    gr.Info(f"Deleted {len(files_to_delete)} files. They're gone. Reduced to atoms.")
    return update_file_list_display(current_filter)

# ---------------------------------------------------------------------------------------
#  πŸš€ Step 6: Main Application Entry Point - "Engage!"
#  This is where we build the UI and launch the app into the digital cosmos.
# ---------------------------------------------------------------------------------------

with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="orange"), title="ScienceBrain.AI") as demo:
    gr.Markdown("# �🧠 ScienceBrain.AI\n*A Multi-Modal Interface for Advanced AI Models*")

    with gr.Row():
        with gr.Column(scale=1, min_width=300):
            # --- Sidebar Controls ---
            gr.Markdown("### βš™οΈ Controls")
            model_selector = gr.Dropdown(
                label="Select Model",
                choices=[DEFAULT_OSS_MODEL, DEFAULT_OPENAI_MODEL, "gpt-4o-mini", "gpt-3.5-turbo"],
                value=DEFAULT_OSS_MODEL,
            )
            save_checkbox = gr.Checkbox(label="πŸ’Ύ Save Session Output", value=True)
            clear_btn = gr.Button("πŸ—‘οΈ Clear Session", variant="stop")

            with gr.Accordion("πŸ“‚ File Management", open=False):
                file_filter = gr.CheckboxGroup(
                    label="Filter by Type",
                    choices=[".md", ".png", ".jpg", ".pdf", ".wav", ".mp3", ".mp4"],
                    value=[".md", ".png"]
                )
                file_list = gr.CheckboxGroup(label="Generated Files (Select to Delete)", choices=[], value=[])
                with gr.Row():
                    refresh_files_btn = gr.Button("πŸ”„ Refresh")
                    delete_files_btn = gr.Button("πŸ—‘οΈ Delete", variant="primary")

        with gr.Column(scale=4):
            # --- Main Chat Interface ---
            chatbot = gr.Chatbot(
                label="Conversation",
                bubble_full_width=False,
                height=650,
                avatar_images=(None, "https://openmoji.org/data/color/svg/1F916.svg") # User, Robot
            )
            multimodal_input = gr.MultimodalTextbox(
                file_types=["image", "audio", "video"],
                placeholder="Type a message or upload a file...",
                label="Your Input"
            )

    # --- Event Listener Wiring ---
    # "There is no spoon." - The Matrix. There are only functions and triggers.
    
    # Main submit action
    multimodal_input.submit(
        fn=handle_multimodal_submit,
        inputs=[multimodal_input, chatbot, model_selector, save_checkbox],
        outputs=[chatbot, multimodal_input]
    )

    # Clear chat action
    clear_btn.click(fn=lambda: ([], []), inputs=None, outputs=[chatbot, chatbot])

    # File management actions
    refresh_files_btn.click(fn=update_file_list_display, inputs=[file_filter], outputs=[file_list])
    file_filter.change(fn=update_file_list_display, inputs=[file_filter], outputs=[file_list])
    delete_files_btn.click(fn=delete_selected_files, inputs=[file_list, file_filter], outputs=[file_list])
    
    # Load initial file list when the app starts.
    demo.load(fn=update_file_list_display, inputs=[file_filter], outputs=[file_list])


# "So, this is how liberty dies... with thunderous applause." - PadmΓ© Amidala
# Or, in our case, how an app starts... with a simple launch command.
if __name__ == "__main__":
    demo.launch(debug=True)