# filename: pro_arabic_transcriper.py import streamlit as st import nemo.collections.asr as nemo_asr import soundfile as sf import tempfile import os import time import magic # for file type detection import ffmpeg import subprocess from pathlib import Path # Custom CSS for gloomy elegant styling st.markdown(""" """, unsafe_allow_html=True) # Check if ffmpeg is available def check_ffmpeg(): try: subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True) return True except (subprocess.SubprocessError, FileNotFoundError): return False if not check_ffmpeg(): st.error("FFmpeg is not installed or not found in PATH. Please install FFmpeg to use this application.") st.markdown(""" ### How to install FFmpeg: **Windows (using Chocolatey):** ``` choco install ffmpeg ``` **Windows (manual):** 1. Download from [ffmpeg.org](https://ffmpeg.org/download.html) 2. Extract and add the bin folder to your system PATH **After installing**, restart this application. """) st.stop() # Accept any file - we'll detect type server-side AUDIO_MIMETYPES = { 'audio/wav', 'audio/x-wav', 'audio/mpeg', 'audio/ogg', 'audio/flac', 'audio/x-m4a', 'audio/aac', 'audio/x-ms-wma' } VIDEO_MIMETYPES = { 'video/mp4', 'video/quicktime', 'video/x-matroska', 'video/x-msvideo', 'video/webm', 'video/x-ms-wmv' } # Load NeMo model once @st.cache_resource def load_model(): try: model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained( model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0" ) return model except Exception as e: # Re-raise so the UI can present a friendly error when called raise RuntimeError(f"Failed to load NeMo model: {e}") model = load_model() def detect_file_type(file_data): """Detect the MIME type of a file using python-magic""" mime = magic.from_buffer(file_data, mime=True) return mime def convert_audio(uploaded_file, target_sample_rate=16000): """ Convert any audio or video file to a 16kHz mono WAV using FFmpeg. Returns the path to the converted temporary WAV file. Args: uploaded_file: A Streamlit UploadedFile or path-like object target_sample_rate: Output sample rate (default 16000 Hz) Returns: str: Path to the converted temporary WAV file """ try: # Read the file data if hasattr(uploaded_file, 'read'): file_data = uploaded_file.read() uploaded_file.seek(0) # Reset position for later use else: with open(uploaded_file, 'rb') as f: file_data = f.read() # Detect file type mime_type = detect_file_type(file_data) # Save to temporary input file suffix = '.tmp' if mime_type in AUDIO_MIMETYPES: suffix = '.audio' + suffix elif mime_type in VIDEO_MIMETYPES: suffix = '.video' + suffix with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_in: if hasattr(uploaded_file, 'read'): uploaded_file.seek(0) tmp_in.write(uploaded_file.read()) else: tmp_in.write(file_data) tmp_in_path = tmp_in.name # Create output WAV file output_path = tempfile.mktemp(suffix='.wav') try: # Build the ffmpeg conversion pipeline stream = ffmpeg.input(tmp_in_path) # Extract audio from video if needed if mime_type in VIDEO_MIMETYPES: stream = stream.audio # Convert to 16kHz mono WAV stream = ffmpeg.output( stream, output_path, acodec='pcm_s16le', # 16-bit PCM ac=1, # mono ar=target_sample_rate,# sample rate loglevel='error' # reduce ffmpeg output ) # Run the conversion ffmpeg.run(stream, overwrite_output=True) return output_path except ffmpeg.Error as e: raise RuntimeError(f"FFmpeg error during conversion: {e.stderr.decode()}") finally: # Clean up input temp file try: os.remove(tmp_in_path) except Exception: pass except Exception as e: raise RuntimeError(f"Failed to convert file to WAV: {str(e)}") # App UI st.markdown(""" """, unsafe_allow_html=True) # Main content - single wide column layout st.markdown("""

🔊 Supports many audio formats and common video types (MP4, MOV, MKV). Upload audio or video and the app will extract audio automatically.

⚡ Fast processing with advanced AI

""", unsafe_allow_html=True) uploaded_file = st.file_uploader("Drag and drop any audio or video file here", type=None, help="Supports any audio or video format that FFmpeg can handle") if uploaded_file is not None: # Basic size check (Streamlit UploadedFile has .size in bytes) try: file_size_mb = uploaded_file.size / (1024 * 1024) except Exception: file_size_mb = None if file_size_mb is not None and file_size_mb > 500: st.warning("Large file detected (>500MB). Processing may take a long time or fail. Consider uploading a smaller file.") # Convert to 16kHz mono wav with st.spinner("Preparing audio for transcription..."): processed_wav = convert_audio(uploaded_file) # Show audio info data, sample_rate = sf.read(processed_wav) channels = 1 if len(data.shape) == 1 else data.shape[1] duration = len(data) / sample_rate # Show audio player and info st.audio(processed_wav, format="audio/wav") st.markdown("### Audio Details") st.markdown("""

Duration

{:.1f}s

Sample Rate

{} Hz

Channels

{}

""".format(duration, sample_rate, channels), unsafe_allow_html=True) # Transcription if st.button("Transcribe Audio", type="primary"): # Create a progress container progress_container = st.empty() progress_container.markdown("""

Processing audio...

""", unsafe_allow_html=True) time.sleep(0.8) progress_container.markdown("""

Transcribing content...

""", unsafe_allow_html=True) # Actual transcription try: with st.spinner(""): result = model.transcribe([processed_wav]) transcript = result[0].text except Exception as e: st.error(f"Transcription failed: {e}") # Cleanup try: os.remove(processed_wav) except Exception: pass progress_container.empty() raise # Update progress to complete progress_container.markdown("""

Transcription complete

""", unsafe_allow_html=True) time.sleep(0.5) progress_container.empty() st.markdown("### Transcription Results") st.markdown(f"""

{transcript}

""", unsafe_allow_html=True) # Download button st.download_button("Download Transcript", transcript, file_name="arabic_transcript.txt") # Cleanup os.remove(processed_wav) # Minimal footer st.markdown("---") st.markdown("""

""", unsafe_allow_html=True)

Arabic Transcriber Pro