Spaces:
Sleeping
Sleeping
import os | |
import uuid | |
import requests | |
import ffmpeg | |
import whisper | |
import librosa | |
import numpy as np | |
from transformers import pipeline | |
from reportlab.lib.pagesizes import letter | |
from reportlab.pdfgen import canvas | |
import gradio as gr | |
# Set environment variables for Hugging Face cache | |
os.environ["HF_HOME"] = "/app/.cache/huggingface" | |
os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/huggingface" | |
# Load the Whisper model once at startup | |
whisper_model = whisper.load_model("base") | |
# Initialize the summarizer | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
# Define the media analysis function | |
def analyze_media(media_url: str, detailed: bool = True): | |
""" | |
Analyze a video/audio from a given CDN URL and generate a detailed PDF report. | |
Args: | |
media_url: URL of the video/audio file. | |
detailed: Whether to include detailed explanations in the report. | |
""" | |
try: | |
# Generate unique filenames | |
unique_id = str(uuid.uuid4()) | |
video_path = f"temp_{unique_id}.mp4" | |
audio_path = f"temp_audio_{unique_id}.wav" | |
pdf_path = f"analysis_{unique_id}.pdf" | |
# Download the video/audio file | |
response = requests.get(media_url, stream=True) | |
if response.status_code != 200: | |
return "Failed to download media file." | |
with open(video_path, "wb") as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
# Extract audio from the media | |
ffmpeg.input(video_path).output(audio_path, ac=1, ar=16000).run(overwrite_output=True) | |
# Load and transcribe the audio | |
audio_data, sample_rate = librosa.load(audio_path, sr=None) | |
# Resample audio to 16 kHz if needed | |
if sample_rate != 16000: | |
audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000) | |
# Transcribe the audio | |
result = whisper_model.transcribe(audio=np.array(audio_data)) | |
transcription = result["text"] | |
# Generate detailed explanation (if required) | |
if detailed: | |
# Dynamically set max_length based on input length | |
input_length = len(transcription.split()) | |
max_length = min(1024, max(input_length * 2, 30)) # Ensure minimum length is 30 | |
explanation = summarizer( | |
transcription, | |
max_length=19, | |
min_length=max(10, input_length // 2), # Ensure minimum length | |
do_sample=False | |
)[0]["summary_text"] | |
else: | |
explanation = transcription | |
# Create a PDF | |
pdf_generated=generate_pdf(pdf_path, transcription, explanation) | |
# Clean up temporary files | |
os.remove(video_path) | |
os.remove(audio_path) | |
# Return the PDF path to display in the Gradio app | |
return pdf_generated | |
except Exception as e: | |
return f"Error analyzing media: {e}" | |
def generate_pdf(pdf_path: str, transcription: str, explanation: str): | |
""" | |
Generate a PDF containing the transcription and detailed explanation. | |
Args: | |
pdf_path: Path to save the PDF. | |
transcription: The transcription text. | |
explanation: The detailed explanation text. | |
""" | |
c = canvas.Canvas(pdf_path, pagesize=letter) | |
width, height = letter | |
# Add Title | |
c.setFont("Helvetica-Bold", 16) | |
c.drawString(72, height - 72, "Media Analysis Report") | |
# Add Transcription | |
c.setFont("Helvetica", 12) | |
c.drawString(72, height - 108, "Transcription:") | |
text = c.beginText(72, height - 126) | |
text.setFont("Helvetica", 10) | |
for line in transcription.splitlines(): | |
text.textLine(line) | |
c.drawText(text) | |
# Add Explanation | |
c.setFont("Helvetica", 12) | |
c.drawString(72, height - 240, "Detailed Explanation:") | |
text = c.beginText(72, height - 258) | |
text.setFont("Helvetica", 10) | |
for line in explanation.splitlines(): | |
text.textLine(line) | |
c.drawText(text) | |
c.save() | |
# Create a Gradio interface | |
interface = gr.Interface( | |
fn=analyze_media, | |
inputs=[ | |
gr.Textbox(label="Media URL", placeholder="Enter the URL of the video/audio file"), | |
gr.Checkbox(label="Detailed Analysis", value=True), | |
], | |
outputs=gr.File(label="Generated PDF Report"), # Displays the PDF as a downloadable file | |
title="Media Analyzer", | |
description="Upload a video/audio URL, and the app will analyze the content and provide a detailed PDF report.", | |
) | |
# Launch the interface | |
if __name__ == "__main__": | |
interface.launch() | |