Spaces:

sam-mata
/

Lecture-Transcriber

Running

App Files Files Community

Lecture-Transcriber / app.py

sam-mata

Style Update

1f9d625 5 months ago

raw

history blame contribute delete

3.32 kB

	import gradio as gr
	import replicate
	import os
	import tempfile
	from moviepy import VideoFileClip

	REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")

	def process_video(video_file):
	if not video_file:
	return "No video file uploaded."
	temp_audio_file = None
	try:
	with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
	temp_audio_file = temp_audio.name
	try:
	video = VideoFileClip(video_file)
	video.audio.write_audiofile(temp_audio_file, logger=None)
	except Exception as e:
	return f"Failed to extract audio from video: {e}"

	with open(temp_audio_file, "rb") as audio_f:
	output = replicate.run(
	"victor-upmeet/whisperx:84d2ad2d6194fe98a17d2b60bef1c7f910c46b2f6fd38996ca457afd9c8abfcb",
	input={"audio_file": audio_f},
	language="en",
	batch_size=512,
	api_token=REPLICATE_API_TOKEN,
	align_output=False,
	diarization=False
	)
	finally:
	if temp_audio_file and os.path.exists(temp_audio_file):
	os.remove(temp_audio_file)

	segments = output.get("segments") if isinstance(output, dict) else output
	script = " ".join(seg["text"] for seg in segments) if segments else output.get("text", "No transcription found.")
	return script

	with gr.Blocks(theme="monochrome", css="""
	.centered-container {
	width: 80vw;
	min-width: 400px;
	max-width: 1400px;
	margin-left: auto !important;
	margin-right: auto !important;
	margin-top: 2.5em;
	margin-bottom: 2.5em;
	background: var(--block-background-fill);
	border-radius: 1.2em;
	box-shadow: 0 0 16px 0 #0001;
	padding: 2em 2em 2em 2em;
	}
	@media (max-width: 900px) {
	.centered-container {
	width: 98vw;
	padding: 1em 0.5em 1em 0.5em;
	}
	}
	.transcribe-btn-center {
	display: flex;
	justify-content: center;
	margin-top: 1em;
	}
	""") as demo:
	with gr.Column(elem_classes="centered-container"):
	gr.Markdown("# Automatic Video Transcriber", elem_id="title")
	gr.Markdown("## Upload a video file and click 'Transcribe' to begin.", elem_id="subtitle")
	with gr.Row():
	with gr.Column(scale=1, min_width=320):
	video_input = gr.Video(
	label="Input Video File (.mp4)",
	interactive=True,
	sources=["upload"],
	)
	with gr.Row(elem_classes="transcribe-btn-center"):
	transcribe_btn = gr.Button("Transcribe", scale=0)
	gr.Markdown("### Please note that file uploads may take a few minutes to process due to network rate limits. A local version of this app is available [here](https://github.com/sam-mata/video-transcriber).", elem_id="note")
	with gr.Column(scale=1, min_width=320):
	text_output = gr.Textbox(
	label="Raw Text Output",
	show_copy_button=True,
	lines=14,
	interactive=False,
	)
	transcribe_btn.click(
	fn=process_video,
	inputs=video_input,
	outputs=text_output
	)

	demo.launch(max_file_size="200MB")