peace2024's picture
Update app.py
8a44556 verified
raw
history blame
4.11 kB
import os
import uuid
import requests
import ffmpeg
import whisper
import librosa
import numpy as np
from transformers import pipeline
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import gradio as gr
# Set environment variables for Hugging Face cache
os.environ["HF_HOME"] = "/app/.cache/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/huggingface"
# Define the media analysis function
def analyze_media(media_url: str, detailed: bool = True):
"""
Analyze a video/audio from a given CDN URL and generate a detailed PDF report.
Args:
media_url: URL of the video/audio file.
detailed: Whether to include detailed explanations in the report.
"""
try:
# Generate unique filenames
unique_id = str(uuid.uuid4())
video_path = f"temp_{unique_id}.mp4"
audio_path = f"temp_audio_{unique_id}.wav"
pdf_path = f"analysis_{unique_id}.pdf"
# Download the video/audio file
response = requests.get(media_url, stream=True)
if response.status_code != 200:
return "Failed to download media file."
with open(video_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
# Extract audio from the media
ffmpeg.input(video_path).output(audio_path, ac=1, ar=16000).run(overwrite_output=True)
# Load and transcribe the audio
model = whisper.load_model("base")
audio_data, sample_rate = librosa.load(audio_path, sr=None)
# Resample audio to 16 kHz if needed
if sample_rate != 16000:
audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
# Transcribe the audio
result = model.transcribe(audio=np.array(audio_data))
transcription = result["text"]
# Generate detailed explanation (if required)
if detailed:
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
explanation = summarizer(transcription, max_length=1024, min_length=256, do_sample=False)[0]["summary_text"]
else:
explanation = transcription
# Create a PDF
generate_pdf(pdf_path, transcription, explanation)
# Clean up temporary files
os.remove(video_path)
os.remove(audio_path)
# Return the PDF
return pdf_path
except Exception as e:
return f"Error analyzing media: {e}"
def generate_pdf(pdf_path: str, transcription: str, explanation: str):
"""
Generate a PDF containing the transcription and detailed explanation.
Args:
pdf_path: Path to save the PDF.
transcription: The transcription text.
explanation: The detailed explanation text.
"""
c = canvas.Canvas(pdf_path, pagesize=letter)
width, height = letter
# Add Title
c.setFont("Helvetica-Bold", 16)
c.drawString(72, height - 72, "Media Analysis Report")
# Add Transcription
c.setFont("Helvetica", 12)
c.drawString(72, height - 108, "Transcription:")
text = c.beginText(72, height - 126)
text.setFont("Helvetica", 10)
for line in transcription.splitlines():
text.textLine(line)
c.drawText(text)
# Add Explanation
c.setFont("Helvetica", 12)
c.drawString(72, height - 240, "Detailed Explanation:")
text = c.beginText(72, height - 258)
text.setFont("Helvetica", 10)
for line in explanation.splitlines():
text.textLine(line)
c.drawText(text)
c.save()
# Create a Gradio interface
interface = gr.Interface(
fn=analyze_media,
inputs=[
gr.Textbox(label="Media URL", placeholder="Enter the URL of the video/audio file"),
gr.Checkbox(label="Detailed Analysis", value=True),
],
outputs=gr.File(label="Generated PDF Report"),
title="Media Analyzer",
description="Upload a video/audio URL, and the app will analyze the content and provide a detailed PDF report.",
)
# Launch the interface
if __name__ == "__main__":
interface.launch(share=True)