Spaces:

peace2024
/

video-analyser-v2

Sleeping

App Files Files Community

video-analyser-v2 / app.py

peace2024

Update app.py

8a44556 verified 9 months ago

raw

history blame

4.11 kB

	import os
	import uuid
	import requests
	import ffmpeg
	import whisper
	import librosa
	import numpy as np
	from transformers import pipeline
	from reportlab.lib.pagesizes import letter
	from reportlab.pdfgen import canvas
	import gradio as gr

	# Set environment variables for Hugging Face cache
	os.environ["HF_HOME"] = "/app/.cache/huggingface"
	os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/huggingface"

	# Define the media analysis function
	def analyze_media(media_url: str, detailed: bool = True):
	"""
	Analyze a video/audio from a given CDN URL and generate a detailed PDF report.

	Args:
	media_url: URL of the video/audio file.
	detailed: Whether to include detailed explanations in the report.
	"""
	try:
	# Generate unique filenames
	unique_id = str(uuid.uuid4())
	video_path = f"temp_{unique_id}.mp4"
	audio_path = f"temp_audio_{unique_id}.wav"
	pdf_path = f"analysis_{unique_id}.pdf"

	# Download the video/audio file
	response = requests.get(media_url, stream=True)
	if response.status_code != 200:
	return "Failed to download media file."
	with open(video_path, "wb") as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)

	# Extract audio from the media
	ffmpeg.input(video_path).output(audio_path, ac=1, ar=16000).run(overwrite_output=True)

	# Load and transcribe the audio
	model = whisper.load_model("base")
	audio_data, sample_rate = librosa.load(audio_path, sr=None)

	# Resample audio to 16 kHz if needed
	if sample_rate != 16000:
	audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)

	# Transcribe the audio
	result = model.transcribe(audio=np.array(audio_data))
	transcription = result["text"]

	# Generate detailed explanation (if required)
	if detailed:
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	explanation = summarizer(transcription, max_length=1024, min_length=256, do_sample=False)[0]["summary_text"]
	else:
	explanation = transcription

	# Create a PDF
	generate_pdf(pdf_path, transcription, explanation)

	# Clean up temporary files
	os.remove(video_path)
	os.remove(audio_path)

	# Return the PDF
	return pdf_path
	except Exception as e:
	return f"Error analyzing media: {e}"


	def generate_pdf(pdf_path: str, transcription: str, explanation: str):
	"""
	Generate a PDF containing the transcription and detailed explanation.

	Args:
	pdf_path: Path to save the PDF.
	transcription: The transcription text.
	explanation: The detailed explanation text.
	"""
	c = canvas.Canvas(pdf_path, pagesize=letter)
	width, height = letter

	# Add Title
	c.setFont("Helvetica-Bold", 16)
	c.drawString(72, height - 72, "Media Analysis Report")

	# Add Transcription
	c.setFont("Helvetica", 12)
	c.drawString(72, height - 108, "Transcription:")
	text = c.beginText(72, height - 126)
	text.setFont("Helvetica", 10)
	for line in transcription.splitlines():
	text.textLine(line)
	c.drawText(text)

	# Add Explanation
	c.setFont("Helvetica", 12)
	c.drawString(72, height - 240, "Detailed Explanation:")
	text = c.beginText(72, height - 258)
	text.setFont("Helvetica", 10)
	for line in explanation.splitlines():
	text.textLine(line)
	c.drawText(text)

	c.save()

	# Create a Gradio interface
	interface = gr.Interface(
	fn=analyze_media,
	inputs=[
	gr.Textbox(label="Media URL", placeholder="Enter the URL of the video/audio file"),
	gr.Checkbox(label="Detailed Analysis", value=True),
	],
	outputs=gr.File(label="Generated PDF Report"),
	title="Media Analyzer",
	description="Upload a video/audio URL, and the app will analyze the content and provide a detailed PDF report.",
	)

	# Launch the interface
	if __name__ == "__main__":
	interface.launch(share=True)