Spaces:
Running
Running
| import gradio as gr | |
| import replicate | |
| import os | |
| import tempfile | |
| from moviepy import VideoFileClip | |
| REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN") | |
| def process_video(video_file): | |
| if not video_file: | |
| return "No video file uploaded." | |
| temp_audio_file = None | |
| try: | |
| with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio: | |
| temp_audio_file = temp_audio.name | |
| try: | |
| video = VideoFileClip(video_file) | |
| video.audio.write_audiofile(temp_audio_file, logger=None) | |
| except Exception as e: | |
| return f"Failed to extract audio from video: {e}" | |
| with open(temp_audio_file, "rb") as audio_f: | |
| output = replicate.run( | |
| "victor-upmeet/whisperx:84d2ad2d6194fe98a17d2b60bef1c7f910c46b2f6fd38996ca457afd9c8abfcb", | |
| input={"audio_file": audio_f}, | |
| language="en", | |
| batch_size=512, | |
| api_token=REPLICATE_API_TOKEN, | |
| align_output=False, | |
| diarization=False | |
| ) | |
| finally: | |
| if temp_audio_file and os.path.exists(temp_audio_file): | |
| os.remove(temp_audio_file) | |
| segments = output.get("segments") if isinstance(output, dict) else output | |
| script = " ".join(seg["text"] for seg in segments) if segments else output.get("text", "No transcription found.") | |
| return script | |
| with gr.Blocks(theme="monochrome", css=""" | |
| .centered-container { | |
| width: 80vw; | |
| min-width: 400px; | |
| max-width: 1400px; | |
| margin-left: auto !important; | |
| margin-right: auto !important; | |
| margin-top: 2.5em; | |
| margin-bottom: 2.5em; | |
| background: var(--block-background-fill); | |
| border-radius: 1.2em; | |
| box-shadow: 0 0 16px 0 #0001; | |
| padding: 2em 2em 2em 2em; | |
| } | |
| @media (max-width: 900px) { | |
| .centered-container { | |
| width: 98vw; | |
| padding: 1em 0.5em 1em 0.5em; | |
| } | |
| } | |
| .transcribe-btn-center { | |
| display: flex; | |
| justify-content: center; | |
| margin-top: 1em; | |
| } | |
| """) as demo: | |
| with gr.Column(elem_classes="centered-container"): | |
| gr.Markdown("# Automatic Video Transcriber", elem_id="title") | |
| gr.Markdown("## Upload a video file and click 'Transcribe' to begin.", elem_id="subtitle") | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=320): | |
| video_input = gr.Video( | |
| label="Input Video File (.mp4)", | |
| interactive=True, | |
| sources=["upload"], | |
| ) | |
| with gr.Row(elem_classes="transcribe-btn-center"): | |
| transcribe_btn = gr.Button("Transcribe", scale=0) | |
| gr.Markdown("### Please note that file uploads may take a few minutes to process due to network rate limits. A local version of this app is available [here](https://github.com/sam-mata/video-transcriber).", elem_id="note") | |
| with gr.Column(scale=1, min_width=320): | |
| text_output = gr.Textbox( | |
| label="Raw Text Output", | |
| show_copy_button=True, | |
| lines=14, | |
| interactive=False, | |
| ) | |
| transcribe_btn.click( | |
| fn=process_video, | |
| inputs=video_input, | |
| outputs=text_output | |
| ) | |
| demo.launch(max_file_size="200MB") |