Spaces:

peace2024
/

video-analyser-v2

Sleeping

App Files Files Community

video-analyser-v2 / app.py

peace2024

Update app.py

f1dfda8 verified 8 months ago

raw

history blame contribute delete

4.61 kB

	import os
	import uuid
	import requests
	import ffmpeg
	import whisper
	import librosa
	import numpy as np
	from transformers import pipeline
	from reportlab.lib.pagesizes import letter
	from reportlab.pdfgen import canvas
	import gradio as gr

	# Set environment variables for Hugging Face cache
	os.environ["HF_HOME"] = "/app/.cache/huggingface"
	os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/huggingface"

	# Load the Whisper model once at startup
	whisper_model = whisper.load_model("base")
	# Initialize the summarizer
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	# Define the media analysis function
	def analyze_media(media_url: str, detailed: bool = True):
	"""
	Analyze a video/audio from a given CDN URL and generate a detailed PDF report.

	Args:
	media_url: URL of the video/audio file.
	detailed: Whether to include detailed explanations in the report.
	"""
	try:
	# Generate unique filenames
	unique_id = str(uuid.uuid4())
	video_path = f"temp_{unique_id}.mp4"
	audio_path = f"temp_audio_{unique_id}.wav"
	pdf_path = f"analysis_{unique_id}.pdf"

	# Download the video/audio file
	response = requests.get(media_url, stream=True)
	if response.status_code != 200:
	return "Failed to download media file."

	with open(video_path, "wb") as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)

	# Extract audio from the media
	ffmpeg.input(video_path).output(audio_path, ac=1, ar=16000).run(overwrite_output=True)

	# Load and transcribe the audio
	audio_data, sample_rate = librosa.load(audio_path, sr=None)

	# Resample audio to 16 kHz if needed
	if sample_rate != 16000:
	audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)

	# Transcribe the audio
	result = whisper_model.transcribe(audio=np.array(audio_data))
	transcription = result["text"]

	# Generate detailed explanation (if required)
	if detailed:
	# Dynamically set max_length based on input length
	input_length = len(transcription.split())
	max_length = min(1024, max(input_length * 2, 30)) # Ensure minimum length is 30
	explanation = summarizer(
	transcription,
	max_length=19,
	min_length=max(10, input_length // 2), # Ensure minimum length
	do_sample=False
	)[0]["summary_text"]
	else:
	explanation = transcription

	# Create a PDF
	pdf_generated=generate_pdf(pdf_path, transcription, explanation)

	# Clean up temporary files
	os.remove(video_path)
	os.remove(audio_path)

	# Return the PDF path to display in the Gradio app
	return pdf_generated
	except Exception as e:
	return f"Error analyzing media: {e}"


	def generate_pdf(pdf_path: str, transcription: str, explanation: str):
	"""
	Generate a PDF containing the transcription and detailed explanation.

	Args:
	pdf_path: Path to save the PDF.
	transcription: The transcription text.
	explanation: The detailed explanation text.
	"""
	c = canvas.Canvas(pdf_path, pagesize=letter)
	width, height = letter

	# Add Title
	c.setFont("Helvetica-Bold", 16)
	c.drawString(72, height - 72, "Media Analysis Report")

	# Add Transcription
	c.setFont("Helvetica", 12)
	c.drawString(72, height - 108, "Transcription:")
	text = c.beginText(72, height - 126)
	text.setFont("Helvetica", 10)
	for line in transcription.splitlines():
	text.textLine(line)
	c.drawText(text)

	# Add Explanation
	c.setFont("Helvetica", 12)
	c.drawString(72, height - 240, "Detailed Explanation:")
	text = c.beginText(72, height - 258)
	text.setFont("Helvetica", 10)
	for line in explanation.splitlines():
	text.textLine(line)
	c.drawText(text)

	c.save()

	# Create a Gradio interface
	interface = gr.Interface(
	fn=analyze_media,
	inputs=[
	gr.Textbox(label="Media URL", placeholder="Enter the URL of the video/audio file"),
	gr.Checkbox(label="Detailed Analysis", value=True),
	],
	outputs=gr.File(label="Generated PDF Report"), # Displays the PDF as a downloadable file
	title="Media Analyzer",
	description="Upload a video/audio URL, and the app will analyze the content and provide a detailed PDF report.",
	)

	# Launch the interface
	if __name__ == "__main__":
	interface.launch()