import re import gradio as gr from youtube_utils import get_youtube_captions, summarize_large_text_with_bart def extract_video_id(url): """ Extract video ID from YouTube URLs using regular expressions. """ patterns = [ r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', # Standard and shortened URLs r'(?:embed\/)([0-9A-Za-z_-]{11})', # Embed URLs r'(?:youtu\.be\/)([0-9A-Za-z_-]{11})' # Shortened URLs ] for pattern in patterns: match = re.search(pattern, url) if match: return match.group(1) return None def summarize_youtube_video(video_url): """ Summarize YouTube video by extracting captions and applying summarization. """ try: # Extract video ID video_id = extract_video_id(video_url) if not video_id: return "Invalid YouTube URL. Please try again.", None # Fetch captions captions = get_youtube_captions(video_id) if not captions: return "Unable to fetch captions for the video. Ensure the video has subtitles.", None # Generate summary summary = summarize_large_text_with_bart(captions) return summary, f"https://www.youtube.com/watch?v={video_id}" except Exception as e: return f"An error occurred: {str(e)}", None # Gradio Interface def create_gradio_interface(): """ Create and return the Gradio Blocks interface. """ with gr.Blocks(title="YouTube Video Summarizer") as interface: gr.Markdown("# 🎥 YouTube Video Summarizer") gr.Markdown( "Provide a YouTube video URL, and this tool will extract captions and summarize the content using a BART model." ) with gr.Row(): video_url_input = gr.Textbox( label="YouTube Video URL", placeholder="Enter the URL of the YouTube video" ) summarize_button = gr.Button("Summarize") with gr.Row(): summary_output = gr.Textbox(label="Summary", lines=10, interactive=False) video_link_output = gr.Textbox(label="YouTube Video Link", interactive=False) summarize_button.click( fn=summarize_youtube_video, inputs=[video_url_input], outputs=[summary_output, video_link_output] ) return interface # Launch the interface if __name__ == "__main__": interface = create_gradio_interface() interface.launch()