import os import uuid import requests import ffmpeg import whisper import librosa import numpy as np from transformers import pipeline from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas import gradio as gr # Set environment variables for Hugging Face cache os.environ["HF_HOME"] = "/app/.cache/huggingface" os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/huggingface" # Load the Whisper model once at startup whisper_model = whisper.load_model("base") # Initialize the summarizer summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Define the media analysis function def analyze_media(media_url: str, detailed: bool = True): """ Analyze a video/audio from a given CDN URL and generate a detailed PDF report. Args: media_url: URL of the video/audio file. detailed: Whether to include detailed explanations in the report. """ try: # Generate unique filenames unique_id = str(uuid.uuid4()) video_path = f"temp_{unique_id}.mp4" audio_path = f"temp_audio_{unique_id}.wav" pdf_path = f"analysis_{unique_id}.pdf" # Download the video/audio file response = requests.get(media_url, stream=True) if response.status_code != 200: return "Failed to download media file." with open(video_path, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) # Extract audio from the media ffmpeg.input(video_path).output(audio_path, ac=1, ar=16000).run(overwrite_output=True) # Load and transcribe the audio audio_data, sample_rate = librosa.load(audio_path, sr=None) # Resample audio to 16 kHz if needed if sample_rate != 16000: audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000) # Transcribe the audio result = whisper_model.transcribe(audio=np.array(audio_data)) transcription = result["text"] # Generate detailed explanation (if required) if detailed: # Dynamically set max_length based on input length input_length = len(transcription.split()) max_length = min(1024, max(input_length * 2, 30)) # Ensure minimum length is 30 explanation = summarizer( transcription, max_length=19, min_length=max(10, input_length // 2), # Ensure minimum length do_sample=False )[0]["summary_text"] else: explanation = transcription # Create a PDF pdf_generated=generate_pdf(pdf_path, transcription, explanation) # Clean up temporary files os.remove(video_path) os.remove(audio_path) # Return the PDF path to display in the Gradio app return pdf_generated except Exception as e: return f"Error analyzing media: {e}" def generate_pdf(pdf_path: str, transcription: str, explanation: str): """ Generate a PDF containing the transcription and detailed explanation. Args: pdf_path: Path to save the PDF. transcription: The transcription text. explanation: The detailed explanation text. """ c = canvas.Canvas(pdf_path, pagesize=letter) width, height = letter # Add Title c.setFont("Helvetica-Bold", 16) c.drawString(72, height - 72, "Media Analysis Report") # Add Transcription c.setFont("Helvetica", 12) c.drawString(72, height - 108, "Transcription:") text = c.beginText(72, height - 126) text.setFont("Helvetica", 10) for line in transcription.splitlines(): text.textLine(line) c.drawText(text) # Add Explanation c.setFont("Helvetica", 12) c.drawString(72, height - 240, "Detailed Explanation:") text = c.beginText(72, height - 258) text.setFont("Helvetica", 10) for line in explanation.splitlines(): text.textLine(line) c.drawText(text) c.save() # Create a Gradio interface interface = gr.Interface( fn=analyze_media, inputs=[ gr.Textbox(label="Media URL", placeholder="Enter the URL of the video/audio file"), gr.Checkbox(label="Detailed Analysis", value=True), ], outputs=gr.File(label="Generated PDF Report"), # Displays the PDF as a downloadable file title="Media Analyzer", description="Upload a video/audio URL, and the app will analyze the content and provide a detailed PDF report.", ) # Launch the interface if __name__ == "__main__": interface.launch()