import os
import re
import json
import time
import random
import requests
import numpy as np
from PIL import Image
from io import BytesIO
import tempfile
import base64
import uuid
import subprocess

# Check if running in Colab - no needed in gradio
#try:
#    import google.colab
#    IN_COLAB = True
#except:
#    IN_COLAB = False

#if IN_COLAB:
#    from google.colab import drive
#    drive.mount('/content/drive')

# --- Package Installation ---
def install_packages():
    """Installs required packages if they are not already installed."""
    packages = [
        "transformers", "moviepy", "pytube", "pydub", "yt-dlp",
        "gradio", "huggingface_hub", "librosa", "soundfile", "openai-whisper", "imageio-ffmpeg"
    ]
    for package in packages:
        try:
            __import__(package)
            print(f"✅ {package} is already installed.")
        except ImportError:
            print(f"⏳ Installing {package}...")
            try:
                subprocess.check_call(["pip", "install", "-q", package])
                print(f"✅ {package} installed successfully.")
            except subprocess.CalledProcessError as e:
                print(f"❌ Error installing {package}: {e}")


install_packages()
import moviepy
from huggingface_hub import InferenceClient
import gradio as gr  # Import gradio
import librosa #for audio
import soundfile #for audio
import torch # Import PyTorch

class AIVideoGenerator:
    def __init__(self):
        """Initialize the AI Video Generator system."""
        self.script = None
        self.scenes = []
        self.audio_clips = []
        self.video_clips = []
        self.final_video = None
        self.temp_dir = tempfile.mkdtemp()
        self.setup_directories()
        self.hf_client = InferenceClient()
        
        print("🎬 AI Video Generator initialized!")
        print("✅ Temporary directories created")
        print("✅ Hugging Face client initialized")
    
    def setup_directories(self):
        """Set up the necessary directories for the project."""
        os.makedirs(os.path.join(self.temp_dir, "images"), exist_ok=True)
        os.makedirs(os.path.join(self.temp_dir, "videos"), exist_ok=True)
        os.makedirs(os.path.join(self.temp_dir, "audio"), exist_ok=True)
        os.makedirs(os.path.join(self.temp_dir, "output"), exist_ok=True)
        
        print(f"📁 Working directory: {self.temp_dir}")
    
    def generate_script(self, user_prompt, verbose=True):
        """
        Generate a structured script using the Hugging Face Inference API
        with qwen-2.5-7B-instruct model.
        
        Args:
            user_prompt (str): The user's input describing the video they want to create
            verbose (bool): Whether to print the generated script
        
        Returns:
            dict: A structured JSON storyboard
        """
        if verbose:
            print("🤖 Generating script using qwen-2.5-7B-instruct...")
        
        # Prepare the prompt for the model
        system_prompt = """You are an expert screenplay writer and video producer. 
        Create a detailed JSON storyboard for a video based on the user's input.
        The storyboard should be formatted as a valid JSON with the following structure:
        {
            "title": "Title of the video",
            "description": "Brief description of the overall video",
            "duration": "Estimated duration in seconds",
            "scenes": [
                {
                    "scene_id": 1,
                    "title": "Scene title",
                    "duration": "Duration in seconds",
                    "narration": "Text to be spoken in this scene",
                    "tone": "Emotional tone for the narration (cheerful, serious, etc.)",
                    "visuals": "Description of what should be shown visually",
                    "keywords": ["keyword1", "keyword2", "keyword3"],
                    "transition": "Type of transition to next scene"
                },
                ...more scenes...
            ]
        }
        
        Make sure:
        1. Each scene is 5-15 seconds long
        2. The narration matches the visuals
        3. Keywords are specific and searchable
        4. Transitions are varied (cut, fade, dissolve, etc.)
        5. The entire video tells a cohesive story
        The output MUST be a valid JSON only with no additional text."""
        
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": f"Create a storyboard for a video about: {user_prompt}"}
        ]
        
        try:
            # Call the Hugging Face Inference API
            response = self.hf_client.chat_completion(
                model="Qwen/Qwen2.5-7B-Instruct",
                messages=messages,
                temperature=0.7,
                max_tokens=2000
            )
            
            script_text = response.choices[0].message.content
            
            # Extract JSON from the response (in case there's surrounding text)
            json_match = re.search(r'```json(.*?)```', script_text, re.DOTALL)
            if json_match:
                script_text = json_match.group(1).strip()
            else:
                # Try to find JSON without code blocks
                json_match = re.search(r'({.*})', script_text, re.DOTALL)
                if json_match:
                    script_text = json_match.group(1).strip()
            
            # Parse the JSON
            self.script = json.loads(script_text)
            
            if verbose:
                print(f"✅ Script generated with {len(self.script['scenes'])} scenes")
                print(f"🎬 Title: {self.script['title']}")
                print(f"⏱️ Estimated duration: {self.script['duration']}")
                
                if verbose > 1:
                    print("\n📝 Script overview:")
                    for i, scene in enumerate(self.script['scenes']):
                        print(f"\nScene {i+1}: {scene['title']} ({scene['duration']}s)")
                        print(f"Narration: {scene['narration'][:100]}...")
                        print(f"Keywords: {', '.join(scene['keywords'])}")
            
            return self.script
            
        except Exception as e:
            print(f"❌ Error generating script: {e}")
            # Fallback to a simple script structure
            self.script = {
                "title": f"Video about {user_prompt}",
                "description": f"A video exploring {user_prompt}",
                "duration": "60",
                "scenes": [
                    {
                        "scene_id": 1,
                        "title": "Introduction",
                        "duration": "10",
                        "narration": f"Let's explore {user_prompt} together.",
                        "tone": "neutral",
                        "visuals": f"Imagery related to {user_prompt}",
                        "keywords": [user_prompt, "introduction", "overview"],
                        "transition": "fade"
                    },
                    {
                        "scene_id": 2,
                        "title": "Main Content",
                        "duration": "40",
                        "narration": f"Here are the key points about {user_prompt}.",
                        "tone": "informative",
                        "visuals": f"Detailed imagery of {user_prompt}",
                        "keywords": [user_prompt, "details", "explanation"],
                        "transition": "cut"
                    },
                    {
                        "scene_id": 3,
                        "title": "Conclusion",
                        "duration": "10",
                        "narration": f"That's a brief overview of {user_prompt}.",
                        "tone": "conclusive",
                        "visuals": f"Summary imagery of {user_prompt}",
                        "keywords": [user_prompt, "conclusion", "summary"],
                        "transition": "fade"
                    }
                ]
            }
            print("⚠️ Used fallback script generation")
            return self.script

    def fetch_images(self, keywords, num_images=3, verbose=True):
        """
        Fetch images based on keywords using Unsplash API or Pixabay API.
        
        Args:
            keywords (list): List of keywords to search for
            num_images (int): Number of images to fetch
            verbose (bool): Whether to print progress
            
        Returns:
            list: List of image paths saved locally
        """
        if verbose:
            print(f"🖼️ Fetching images for keywords: {', '.join(keywords)}")
        
        # Join keywords for the search query
        query = " ".join(keywords)
        image_paths = []
        
        # Try different free image APIs
        image_sources = [
            # Unsplash Source (no API key needed for basic usage)
            lambda q, n: [f"https://source.unsplash.com/1600x900/?{q}&sig={i}" for i in range(n)],
            
            # Pixabay API with fallback to no-API approach
            lambda q, n: [f"https://pixabay.com/api/?key=demo&q={q}&image_type=photo&per_page={n}"]
        ]
        
        successful = False
        
        for source_func in image_sources:
            if successful:
                break
                
            try:
                urls = source_func(query, num_images)
                
                for i, url in enumerate(urls):
                    try:
                        response = requests.get(url, timeout=10)
                        
                        # For direct image URLs (Unsplash)
                        if response.headers.get('content-type', '').startswith('image/'):
                            img = Image.open(BytesIO(response.content))
                            filename = os.path.join(self.temp_dir, "images", f"{query.replace(' ', '_')}_{i}.jpg")
                            img.save(filename)
                            image_paths.append(filename)
                            
                        # For API responses (Pixabay)
                        elif response.headers.get('content-type', '').startswith('application/json'):
                            data = response.json()
                            if 'hits' in data and len(data['hits']) > 0:
                                for j, hit in enumerate(data['hits'][:num_images]):
                                    img_url = hit.get('largeImageURL') or hit.get('webformatURL')
                                    if img_url:
                                        img_response = requests.get(img_url, timeout=10)
                                        img = Image.open(BytesIO(img_response.content))
                                        filename = os.path.join(self.temp_dir, "images", f"{query.replace(' ', '_')}_{j}.jpg")
                                        img.save(filename)
                                        image_paths.append(filename)
                        
                        if len(image_paths) >= num_images:
                            successful = True
                            break
                            
                    except Exception as e:
                        if verbose:
                            print(f"⚠️ Error fetching image {i}: {e}")
                        continue
                        
            except Exception as e:
                if verbose:
                    print(f"⚠️ Error with image source: {e}")
                continue
        
        # If we couldn't get any images, create placeholder images
        if len(image_paths) == 0:
            if verbose:
                print("⚠️ Creating placeholder images")
            
            for i in range(num_images):
                # Create a colored background with text
                img = Image.new('RGB', (1600, 900), color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)))
                filename = os.path.join(self.temp_dir, "images", f"{query.replace(' ', '_')}_{i}.jpg")
                img.save(filename)
                image_paths.append(filename)
        
        if verbose:
            print(f"✅ Downloaded {len(image_paths)} images")
            
        return image_paths

    def fetch_videos(self, keywords, duration=10, verbose=True):
        """
        Fetch video clips based on keywords from public sources.
        
        Args:
            keywords (list): List of keywords to search for
            duration (int): Approximate desired duration in seconds
            verbose (bool): Whether to print progress
            
        Returns:
            str: Path to the downloaded video
        """
        if verbose:
            print(f"🎥 Fetching videos for keywords: {', '.join(keywords)}")
        
        query = " ".join(keywords)
        output_path = os.path.join(self.temp_dir, "videos", f"{query.replace(' ', '_')}.mp4")
        
        # Check if we already have this video
        if os.path.exists(output_path):
            if verbose:
                print("✅ Using cached video")
            return output_path
        
        # Try to fetch from YouTube
        try:
            # Search YouTube using yt-dlp
            command = f'yt-dlp ytsearch5:"{query}" --print title,duration,webpage_url --flat-playlist > search_results.txt'
            os.system(command)
            
            # Parse the results
            with open("search_results.txt", "r") as f:
                lines = f.readlines()
            
            # Find a suitable video (not too long)
            video_url = None
            for i in range(0, len(lines), 3):
                if i+2 < len(lines):
                    try:
                        title = lines[i].strip()
                        duration_str = lines[i+1].strip()
                        url = lines[i+2].strip()
                        
                        # Parse duration
                        if ':' in duration_str:
                            parts = duration_str.split(':')
                            if len(parts) == 2:  # MM:SS
                                video_duration = int(parts[0]) * 60 + int(parts[1])
                            else:  # HH:MM:SS
                                video_duration = int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
                        else:
                            video_duration = int(duration_str)
                        
                        # Select video that's not too long (< 10 mins)
                        if video_duration < 600:
                            video_url = url
                            break
                    except:
                        continue
            
            if video_url:
                if verbose:
                    print(f"🎬 Found video: {title}")
                
                # Download a short clip using yt-dlp
                command = f'yt-dlp "{video_url}" -f "best[height<=720]" --postprocessor-args "ffmpeg:-ss 0 -t {duration + 5}" -o "{output_path}"'
                os.system(command)

                # Verify the file exists and has content
                if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
                    if verbose:
                        print(f"✅ Downloaded video clip to {output_path}")
                    return output_path
        
        except Exception as e:
            if verbose:
                print(f"⚠️ Error downloading YouTube video: {e}")
        
        # Fallback: Generate a video from images
        if verbose:
            print("⚠️ Falling back to creating video from images")
        
        # Get images for the keywords
        image_paths = self.fetch_images(keywords, num_images=5, verbose=False)
        
        try:
            # Create a video from the images
            clips = []
            for img_path in image_paths:
                clip = ImageClip(img_path).set_duration(duration / len(image_paths))
                # Add a simple pan effect (Ken Burns effect)
                clip = clip.resize(height=1080).resize(lambda t: 1 + 0.05 * t)
                clips.append(clip)
            
            concat_clip = concatenate_videoclips(clips, method="compose")
            concat_clip.write_videofile(output_path, fps=24, audio=False, codec="libx264")
            
            if verbose:
                print(f"✅ Created video from images at {output_path}")
            
            return output_path
            
        except Exception as e:
            if verbose:
                print(f"❌ Error creating video from images: {e}")
            
            # Last resort: Create a colored screen video
            try:
                # Create a colored clip with text
                color_clip = ColorClip(size=(1280, 720), color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)))
                color_clip = color_clip.set_duration(duration)
                
                # Add text with keywords
                txt_clip = TextClip(txt=query, fontsize=60, color='white')
                txt_clip = txt_clip.set_position('center').set_duration(duration)
                
                # Combine clips
                final_clip = CompositeVideoClip([color_clip, txt_clip])
                final_clip.write_videofile(output_path, fps=24, audio=False, codec="libx264")
                
                if verbose:
                    print(f"✅ Created fallback color video at {output_path}")
                
                return output_path
                
            except Exception as e:
                if verbose:
                    print(f"❌ Error creating fallback video: {e}")
                
                # Create an empty file as a last resort
                with open(output_path, 'w') as f:
                    pass
                
                return output_path

    def generate_voiceover(self, text, tone="neutral", voice_id=None, verbose=True):
        """
        Generate AI voiceover using Kokoro TTS with dynamic tone adjustment.
        
        Args:
            text (str): Text to convert to speech
            tone (str): Emotional tone for the narration
            voice_id (str): Optional specific voice ID to use
            verbose (bool): Whether to print progress
            
        Returns:
            str: Path to the generated audio file
        """
        if verbose:
            print(f"🔊 Generating voiceover for: {text[:50]}...")
        
        # Create a unique filename based on content hash
        text_hash = str(hash(text))[:10]
        output_path = os.path.join(self.temp_dir, "audio", f"voiceover_{text_hash}.mp3")
        
        # Check if we already have this audio
        if os.path.exists(output_path):
            if verbose:
                print("✅ Using cached audio")
            return output_path
        
        # Map tone to voice parameters
        tone_params = {
            "cheerful": {"pitch": 1.1, "rate": 1.1, "voice_id": "cynthia"},
            "serious": {"pitch": 0.9, "rate": 0.95, "voice_id": "adam"},
            "excited": {"pitch": 1.2, "rate": 1.15, "voice_id": "bella"},
            "calm": {"pitch": 0.95, "rate": 0.9, "voice_id": "daniel"},
            "sad": {"pitch": 0.85, "rate": 0.9, "voice_id": "emily"},
            "neutral": {"pitch": 1.0, "rate": 1.0, "voice_id": "michael"},
            "professional": {"pitch": 1.0, "rate": 1.05, "voice_id": "olivia"}
        }
        
        # Get tone parameters or use neutral as default
        params = tone_params.get(tone.lower(), tone_params["neutral"])
        
        # Override voice_id if provided
        if voice_id:
            params["voice_id"] = voice_id
        
        try:
            # Using Hugging Face TTS API with Kokoro TTS
            response = self.hf_client.text_to_speech(
                text=text,
                model="Kokoro/Kokoro-TTS-v2"
            )
            
            # Save the audio
            with open(output_path, "wb") as f:
                f.write(response)
            
            # Adjust audio parameters using pydub
            try:
                audio = AudioSegment.from_file(output_path)
                
                # Apply pitch and rate adjustment
                # Note: Basic modification using pydub (more advanced would require librosa)
                pitch_adjusted = audio._spawn(audio.raw_data, overrides={
                    "frame_rate": int(audio.frame_rate * params["pitch"])
                })
                rate_adjusted = pitch_adjusted.set_frame_rate(audio.frame_rate)
                
                # Export the adjusted audio
                rate_adjusted.export(output_path, format="mp3")
                
            except Exception as e:
                if verbose:
                    print(f"⚠️ Error adjusting audio parameters: {e}")
                # Keep the original audio if adjustment fails
            
            if verbose:
                print(f"✅ Generated voiceover at {output_path}")
            
            return output_path
            
        except Exception as e:
            if verbose:
                print(f"❌ Error generating voiceover with Kokoro TTS: {e}")
            
            # Fallback to gTTS if available
            try:
                from gtts import gTTS
                
                tts = gTTS(text=text, lang='en', slow=False)
                tts.save(output_path)
                
                if verbose:
                    print(f"⚠️ Used fallback gTTS for voiceover at {output_path}")
                
                return output_path
                
            except Exception as e2:
                if verbose:
                    print(f"❌ Error with fallback TTS: {e2}")
                
                # Create an empty audio file
                empty_audio = AudioSegment.silent(duration=len(text.split()) * 500)  # Rough estimation of duration
                empty_audio.export(output_path, format="mp3")
                
                if verbose:
                    print(f"⚠️ Created silent audio at {output_path}")
                
                return output_path

    def process_scene(self, scene, verbose=True):
        """
        Process a single scene from the script to create audio and video.
        
        Args:
            scene (dict): Scene dictionary from the script
            verbose (bool): Whether to print progress
            
        Returns:
            tuple: (video_path, audio_path, scene_data)
        """
        if verbose:
            print(f"\n🎬 Processing Scene {scene['scene_id']}: {scene['title']}")
        
        # Extract scene information
        narration = scene['narration']
        keywords = scene['keywords']
        duration = float(scene['duration'])
        tone = scene.get('tone', 'neutral')
        visual_description = scene.get('visuals', '')
        
        # Add the visual description to the keywords for more targeted video retrieval
        enhanced_keywords = keywords.copy()
        if visual_description:
            # Extract key phrases from visual description
            visual_keywords = [w for w in visual_description.split() if len(w) > 3]
            enhanced_keywords.extend(visual_keywords[:3])  # Add up to 3 keywords from visual description
        
        # Generate voiceover with synchronization markers for later alignment
        audio_path = self.generate_voiceover(narration, tone=tone, verbose=verbose)
        
        # Get audio duration first to ensure visual content matches
        audio_clip = AudioFileClip(audio_path)
        audio_duration = audio_clip.duration
        audio_clip.close()
        
        # Fetch video based on enhanced keywords and precise duration
        video_path = self.fetch_videos(enhanced_keywords, duration=max(audio_duration, duration), verbose=verbose)
        
        # Load the video to analyze and improve it
        video_clip = VideoFileClip(video_path)
        
        # Analyze video brightness and contrast - adjust if needed
        try:
            frame = video_clip.get_frame(0)  # Get first frame
            brightness = np.mean(frame)
            
            # If video is too dark or too bright, apply correction
            if brightness < 40:  # Too dark
                video_clip = video_clip.fx(vfx.colorx, 1.3)
            elif brightness > 200:  # Too bright
                video_clip = video_clip.fx(vfx.colorx, 0.8)
        except:
            pass  # Skip if frame analysis fails
        
        # Ensure video is long enough for audio
        target_duration = max(audio_duration, duration) 
        if video_clip.duration < target_duration:
            # Instead of simple looping, use a more sophisticated approach
            if verbose:
                print(f"⚠️ Video too short, extending to {target_duration:.1f}s using advanced techniques")
            
            # If video is very short, use ping-pong technique (forward then reverse)
            if video_clip.duration < target_duration / 2:
                clip1 = video_clip
                clip2 = video_clip.fx(vfx.time_mirror)  # Play in reverse
                extended_clip = concatenate_videoclips([clip1, clip2])
                # Loop if still needed
                n_loops = int(np.ceil(target_duration / extended_clip.duration))
                video_clip = extended_clip.fx(vfx.loop, n=n_loops)
            else:
                # Use loop with subtle zoom to make it less obvious
                video_clip = video_clip.fx(vfx.loop, n=int(np.ceil(target_duration / video_clip.duration)))
                video_clip = video_clip.fx(vfx.resize, lambda t: 1 + 0.05 * np.sin(t/2))  # Subtle zoom effect
        
        # Add gentle motion to static images (if the video seems static)
        if scene.get('detected_static_image', False) or np.random.random() < 0.3:  # Random chance to add motion
            try:
                # Apply subtle Ken Burns effect (slow pan and zoom)
                start_scale, end_scale = 1.0, 1.05  # Subtle zoom in
                start_pos, end_pos = (0, 0), (10, 5)  # Subtle pan
                
                # Create transform function for zoom and pan
                def transform(get_frame, t):
                    scale = start_scale + (end_scale - start_scale) * t / video_clip.duration
                    pos_x = start_pos[0] + (end_pos[0] - start_pos[0]) * t / video_clip.duration
                    pos_y = start_pos[1] + (end_pos[1] - start_pos[1]) * t / video_clip.duration
                    
                    frame = get_frame(t)
                    h, w = frame.shape[:2]
                    
                    # Apply zoom
                    zoomed = cv2.resize(frame, None, fx=scale, fy=scale)
                    
                    # Calculate new dimensions
                    zh, zw = zoomed.shape[:2]
                    
                    # Calculate crop area
                    x1 = int(pos_x + (zw - w) / 2)
                    y1 = int(pos_y + (zh - h) / 2)
                    x2 = int(x1 + w)
                    y2 = int(y1 + h)
                    
                    # Ensure bounds
                    x1 = max(0, min(x1, zw - w))
                    y1 = max(0, min(y1, zh - h))
                    x2 = min(zw, x1 + w)
                    y2 = min(zh, y1 + h)
                    
                    # Crop
                    return zoomed[y1:y2, x1:x2]
                
                # Apply transform if it doesn't error
                try:
                    # This is a simplified approximation - in reality, we would use moviepy's proper transform
                    # functions which would require more complex setup
                    video_clip = video_clip.resize(lambda t: 1 + 0.05 * t / video_clip.duration)
                except:
                    pass
            except:
                pass  # Skip if transform fails
        
        # Trim video to match target duration
        video_clip = video_clip.subclip(0, target_duration)
        
        # Save the improved video
        improved_video_path = os.path.join(self.temp_dir, "videos", f"improved_{os.path.basename(video_path)}")
        video_clip.write_videofile(improved_video_path, codec="libx264", audio=False)
        video_clip.close()
        
        # Return scene data
        scene_data = {
            'scene_id': scene['scene_id'],
            'title': scene['title'],
            'video_path': improved_video_path,
            'audio_path': audio_path,
            'narration': narration,
            'duration': target_duration,
            'transition': scene.get('transition', 'cut'),
            'keywords': keywords,
            'visual_description': visual_description
        }
        
        if verbose:
            print(f"✅ Scene processed: {target_duration:.1f}s with enhanced visuals")
        
        return scene_data

    def get_background_music(self, duration, mood="neutral", verbose=True):
        """
        Get background music from open source repositories.
        
        Args:
            duration (float): Required duration in seconds
            mood (str): The mood of the music
            verbose (bool): Whether to print progress
            
        Returns:
            str: Path to the background music file
        """
        if verbose:
            print(f"🎵 Finding background music ({mood}, {duration:.1f}s)")
        
        output_path = os.path.join(self.temp_dir, "audio", f"background_{mood}_{int(duration)}.mp3")
        
        # Try to use a pre-defined set of free music URLs
        free_music_urls = {
            "neutral": "https://cdn.pixabay.com/download/audio/2022/01/18/audio_ba33122ff6.mp3?filename=ambient-piano-amp-strings-10711.mp3",
            "cheerful": "https://cdn.pixabay.com/download/audio/2022/04/27/audio_8c0d4a1380.mp3?filename=upbeat-uplifting-corporate-12954.mp3",
            "serious": "https://cdn.pixabay.com/download/audio/2022/01/25/audio_2b5eb3efde.mp3?filename=lifelike-126735.mp3",
            "dramatic": "https://cdn.pixabay.com/download/audio/2022/05/27/audio_f8a876107c.mp3?filename=cinematic-documentary-piano-14007.mp3",
            "inspirational": "https://cdn.pixabay.com/download/audio/2022/09/02/audio_13b3266382.mp3?filename=inspiring-cinematic-ambient-116199.mp3"
        }
        
        url = free_music_urls.get(mood.lower(), free_music_urls["neutral"])
        
        try:
            response = requests.get(url, timeout=15)
            if response.status_code == 200:
                with open(output_path, 'wb') as f:
                    f.write(response.content)
                
                # Load and adjust the music to fit the required duration
                music = AudioSegment.from_file(output_path)
                
                # If music is too short, loop it
                if len(music) < duration * 1000:  # Convert to milliseconds
                    num_loops = int(np.ceil(duration * 1000 / len(music)))
                    music = music * num_loops
                
                # Trim to required duration
                music = music[:int(duration * 1000)]
                
                # Fade in and out
                fade_duration = min(3000, int(duration * 1000 / 4))  # 3 seconds or 1/4 of total, whichever is smaller
                music = music.fade_in(fade_duration).fade_out(fade_duration)
                
                # Lower volume for background
                music = music - 12  # Reduce by 12 dB
                
                # Export the adjusted music
                music.export(output_path, format="mp3")
                
                if verbose:
                    print(f"✅ Downloaded and prepared background music")
                
                return output_path
                
        except Exception as e:
            if verbose:
                print(f"⚠️ Error getting background music: {e}")
        
        # If we failed to get music, create silent audio
        silent_audio = AudioSegment.silent(duration=int(duration * 1000))
        silent_audio.export(output_path, format="mp3")
        
        if verbose:
            print("⚠️ Created silent background track")
        
        return output_path

    def create_text_overlay(self, text, duration, position="bottom", title=False):
        """
        Create a text overlay clip for the video.
        
        Args:
            text (str): Text to display
            duration (float): Duration in seconds
            position (str): Position on screen ('top', 'bottom', 'center')
            title (bool): Whether this is a title (larger font)
            
        Returns:
            TextClip: The text overlay clip
        """
        # Set text properties based on type
        if title:
            fontsize = 60
            color = 'white'
            bg_color = 'rgba(0, 0, 0, 0.7)'
            stroke_color = 'black'
            stroke_width = 2
        else:
            fontsize = 36
            color = 'white'
            bg_color = 'rgba(0, 0, 0, 0.5)'
            stroke_color = 'black'
            stroke_width = 1
        
        # Create text clip
        txt_clip = TextClip(
            txt=text,
            fontsize=fontsize,
            color=color,
            stroke_color=stroke_color,
            stroke_width=stroke_width,
            bg_color=bg_color,
            method='caption',
            align='center',
            size=(720, None)  # Width constrained, height auto
        )
        
        # Set position
        if position == "top":
            pos = ('center', 50)
        elif position == "bottom":
            pos = ('center', 'bottom')
        else:  # center
            pos = 'center'
        
        # Set duration and position
        txt_clip = txt_clip.set_position(pos).set_duration(duration)
        
        # Add fade in/out
        fade_duration = min(1.0, duration / 4)
        txt_clip = txt_clip.fadeout(fade_duration).fadein(fade_duration)
        
        return txt_clip

    def assemble_video(self, verbose=True):
        """
        Assemble the final video from processed scenes.
        
        Args:
            verbose (bool): Whether to print progress
            
        Returns:
            str: Path to the final rendered video
        """
        if not self.script:
            raise ValueError("No script generated. Please run generate_script() first.")
        
        if verbose:
            print("\n🎞️ Assembling final video...")
        
        # Process each scene
        processed_scenes = []
        total_duration = 0
        
        for scene in tqdm(self.script['scenes'], desc="Processing scenes"):
            scene_data = self.process_scene(scene, verbose=(verbose > 1))
            processed_scenes.append(scene_data)
            total_duration += scene_data['duration']
        
        if verbose:
            print(f"✅ Processed {len(processed_scenes)} scenes, total duration: {total_duration:.1f}s")
        
        # Get background music for the entire video
        bg_music_path = self.get_background_music(total_duration, verbose=verbose)
        
        # Assemble video clips
        final_clips = []
        
        for i, scene in enumerate(processed_scenes):
            # Load video and audio for this scene
            video_clip = VideoFileClip(scene['video_path'])
            audio_clip = AudioFileClip(scene['audio_path'])
            
            # Trim video to match intended duration
            video_clip = video_clip.subclip(0, scene['duration'])
            
            # Create text overlays
            if i == 0:
                # Title overlay for first scene
                title_overlay = self.create_text_overlay(
                    self.script['title'],
                    min(5, scene['duration']),
                    position="top",
                    title=True
                )
                
                # Scene title for first scene
                scene_overlay = self.create_text_overlay(
                    scene['title'],
                    min(4, scene['duration']),
                    position="bottom",
                    title=False
                )
                
                # Combine video with overlays
                video_clip = CompositeVideoClip([
                    video_clip,
                    title_overlay,
                    scene_overlay
                ])
            else:
                # Scene title overlay
                scene_overlay = self.create_text_overlay(
                    scene['title'],
                    min(4, scene['duration']),
                    position="bottom",
                    title=False
                )
                
                # Combine video with overlay
                video_clip = CompositeVideoClip([video_clip, scene_overlay])
            
            # Set audio
            video_clip = video_clip.set_audio(audio_clip)
            
            # Add transition effect based on scene specification
            transition = scene.get('transition', 'cut').lower()
            
            if i > 0:  # Only apply transitions after the first clip
                if transition == 'fade':
                    video_clip = video_clip.fadein(1)
                elif transition == 'dissolve':
                    # We'll handle dissolve in the concatenation step
                    pass
                elif transition == 'zoom':
                    video_clip = video_clip.resize(lambda t: 1 + 0.05 * (1 - min(t, 1)))
                # 'cut' is default and needs no special handling
            
            final_clips.append(video_clip)
        
        # Concatenate clips with appropriate transitions
        if verbose:
            print("🔄 Applying transitions and concatenating clips...")
        
        # Handle different transitions for the concatenation
        transition_durations = []
        for i, scene in enumerate(processed_scenes):
            if i == 0:
                transition_durations.append(0)  # No transition for first clip
            else:
                transition = scene.get('transition', 'cut').lower()
                if transition == 'dissolve':
                    transition_durations.append(1)  # 1 second dissolve
                else:
                    transition_durations.append(0)  # No crossfade for other transitions
        
        final_video = concatenate_videoclips(
            final_clips,
            method="crossfadein",
            crossfadein=transition_durations
        )
        
        # Add background music
        if verbose:
            print("🔊 Adding background music...")
        
        bg_music = AudioFileClip(bg_music_path)
        bg_music = bg_music.subclip(0, final_video.duration)
        bg_music = bg_music.volumex(0.2)  # Lower volume for background
        
        # Mix background music with existing audio
        final_audio = CompositeAudioClip([final_video.audio, bg_music])
        final_video = final_video.set_audio(final_audio)
        
        # Add ending fade out
        final_video = final_video.fadeout(2)
        
        # Render the final video
        output_path = os.path.join(self.temp_dir, "output", f"{self.script['title'].replace(' ', '_')}.mp4")
        
        if verbose:
            print(f"💾 Rendering final video to {output_path}...")
        
        # Use high quality rendering settings
        final_video.write_videofile(
            output_path,
            fps=24,
            codec="libx264",
            audio_codec="aac",
            preset="medium",
            audio_bitrate="192k",
            bitrate="5000k"
        )
        
        if verbose:
            print("✅ Video rendering complete!")
        
        self.final_video = output_path
        return output_path

    def generate_video(self, user_prompt, verbose=True):
        """
        End-to-end function to generate a video from a user prompt.

        Args:
            user_prompt (str): The user's input describing the video they want to create
            verbose (bool): Whether to print progress (Gradio doesn't use this directly)

        Returns:
            str: Path to the final rendered video, or None if generation failed.
        """
        try:
            # Step 1: Generate script
            self.generate_script(user_prompt, verbose=verbose)

            # Step 2: Assemble and render video
            output_path = self.assemble_video(verbose=verbose)


            return output_path

        except Exception as e:
            print(f"❌ Error generating video: {e}")
            import traceback
            traceback.print_exc()
            return None


# --- Gradio Interface ---

def run_video_generation(user_prompt, progress=gr.Progress()):
    """
    Wrapper function for Gradio that integrates with the AIVideoGenerator.

    Args:
        user_prompt (str):  The user's input describing the desired video.
        progress (gr.Progress): Gradio progress object for tracking.

    Returns:
        str:  Path to the generated video file, or a message if an error occurred.
    """
    progress(0, desc="Starting video generation...")
    generator = AIVideoGenerator()
    video_path = generator.generate_video(user_prompt, verbose=False) #Verbose False

    if video_path:
        progress(1, desc="Video generation complete!")
        return video_path
    else:
         return "Video generation failed. Please check the logs for details."

# Gradio Interface Setup
if __name__ == '__main__': #add for the gradio
    with gr.Blocks(title="AI Video Generator") as demo:
        gr.Markdown(
            """
            # AI Video Generator 🎬
            Enter a topic, and let the AI create a short video for you!
            """
        )
        with gr.Row():
            with gr.Column():
                input_prompt = gr.Textbox(
                    label="What video would you like to create?",
                    placeholder="Enter a topic, e.g., 'The history of coffee', 'How to make pizza', 'The life cycle of a butterfly'",
                    lines=3
                )
                generate_button = gr.Button("Generate Video", variant="primary")
            with gr.Column():
                output_video = gr.Video(label="Generated Video", interactive=False)

        # Event handling
        generate_button.click(
            fn=run_video_generation,
            inputs=input_prompt,
            outputs=output_video
        )
        
        # Examples
        gr.Examples(
            examples=[
                ["The history of the internet"],
                ["How to train a dog"],
                ["A travel guide to Paris"],
                ["The benefits of meditation"],
                ["The future of artificial intelligence"]
            ],
            inputs=input_prompt
        )

    demo.launch()