peace2024's picture
Update app.py
f1dfda8 verified
import os
import uuid
import requests
import ffmpeg
import whisper
import librosa
import numpy as np
from transformers import pipeline
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import gradio as gr
# Set environment variables for Hugging Face cache
os.environ["HF_HOME"] = "/app/.cache/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/huggingface"
# Load the Whisper model once at startup
whisper_model = whisper.load_model("base")
# Initialize the summarizer
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Define the media analysis function
def analyze_media(media_url: str, detailed: bool = True):
"""
Analyze a video/audio from a given CDN URL and generate a detailed PDF report.
Args:
media_url: URL of the video/audio file.
detailed: Whether to include detailed explanations in the report.
"""
try:
# Generate unique filenames
unique_id = str(uuid.uuid4())
video_path = f"temp_{unique_id}.mp4"
audio_path = f"temp_audio_{unique_id}.wav"
pdf_path = f"analysis_{unique_id}.pdf"
# Download the video/audio file
response = requests.get(media_url, stream=True)
if response.status_code != 200:
return "Failed to download media file."
with open(video_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
# Extract audio from the media
ffmpeg.input(video_path).output(audio_path, ac=1, ar=16000).run(overwrite_output=True)
# Load and transcribe the audio
audio_data, sample_rate = librosa.load(audio_path, sr=None)
# Resample audio to 16 kHz if needed
if sample_rate != 16000:
audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
# Transcribe the audio
result = whisper_model.transcribe(audio=np.array(audio_data))
transcription = result["text"]
# Generate detailed explanation (if required)
if detailed:
# Dynamically set max_length based on input length
input_length = len(transcription.split())
max_length = min(1024, max(input_length * 2, 30)) # Ensure minimum length is 30
explanation = summarizer(
transcription,
max_length=19,
min_length=max(10, input_length // 2), # Ensure minimum length
do_sample=False
)[0]["summary_text"]
else:
explanation = transcription
# Create a PDF
pdf_generated=generate_pdf(pdf_path, transcription, explanation)
# Clean up temporary files
os.remove(video_path)
os.remove(audio_path)
# Return the PDF path to display in the Gradio app
return pdf_generated
except Exception as e:
return f"Error analyzing media: {e}"
def generate_pdf(pdf_path: str, transcription: str, explanation: str):
"""
Generate a PDF containing the transcription and detailed explanation.
Args:
pdf_path: Path to save the PDF.
transcription: The transcription text.
explanation: The detailed explanation text.
"""
c = canvas.Canvas(pdf_path, pagesize=letter)
width, height = letter
# Add Title
c.setFont("Helvetica-Bold", 16)
c.drawString(72, height - 72, "Media Analysis Report")
# Add Transcription
c.setFont("Helvetica", 12)
c.drawString(72, height - 108, "Transcription:")
text = c.beginText(72, height - 126)
text.setFont("Helvetica", 10)
for line in transcription.splitlines():
text.textLine(line)
c.drawText(text)
# Add Explanation
c.setFont("Helvetica", 12)
c.drawString(72, height - 240, "Detailed Explanation:")
text = c.beginText(72, height - 258)
text.setFont("Helvetica", 10)
for line in explanation.splitlines():
text.textLine(line)
c.drawText(text)
c.save()
# Create a Gradio interface
interface = gr.Interface(
fn=analyze_media,
inputs=[
gr.Textbox(label="Media URL", placeholder="Enter the URL of the video/audio file"),
gr.Checkbox(label="Detailed Analysis", value=True),
],
outputs=gr.File(label="Generated PDF Report"), # Displays the PDF as a downloadable file
title="Media Analyzer",
description="Upload a video/audio URL, and the app will analyze the content and provide a detailed PDF report.",
)
# Launch the interface
if __name__ == "__main__":
interface.launch()