# filename: pro_arabic_transcriper.py import streamlit as st import nemo.collections.asr as nemo_asr import soundfile as sf import tempfile import os import time import magic # for file type detection import ffmpeg import subprocess from pathlib import Path # Custom CSS for gloomy elegant styling st.markdown(""" """, unsafe_allow_html=True) # Check if ffmpeg is available def check_ffmpeg(): try: subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True) return True except (subprocess.SubprocessError, FileNotFoundError): return False if not check_ffmpeg(): st.error("FFmpeg is not installed or not found in PATH. Please install FFmpeg to use this application.") st.markdown(""" ### How to install FFmpeg: **Windows (using Chocolatey):** ``` choco install ffmpeg ``` **Windows (manual):** 1. Download from [ffmpeg.org](https://ffmpeg.org/download.html) 2. Extract and add the bin folder to your system PATH **After installing**, restart this application. """) st.stop() # Accept any file - we'll detect type server-side AUDIO_MIMETYPES = { 'audio/wav', 'audio/x-wav', 'audio/mpeg', 'audio/ogg', 'audio/flac', 'audio/x-m4a', 'audio/aac', 'audio/x-ms-wma' } VIDEO_MIMETYPES = { 'video/mp4', 'video/quicktime', 'video/x-matroska', 'video/x-msvideo', 'video/webm', 'video/x-ms-wmv' } # Load NeMo model once @st.cache_resource def load_model(): try: model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained( model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0" ) return model except Exception as e: # Re-raise so the UI can present a friendly error when called raise RuntimeError(f"Failed to load NeMo model: {e}") model = load_model() def detect_file_type(file_data): """Detect the MIME type of a file using python-magic""" mime = magic.from_buffer(file_data, mime=True) return mime def convert_audio(uploaded_file, target_sample_rate=16000): """ Convert any audio or video file to a 16kHz mono WAV using FFmpeg. Returns the path to the converted temporary WAV file. Args: uploaded_file: A Streamlit UploadedFile or path-like object target_sample_rate: Output sample rate (default 16000 Hz) Returns: str: Path to the converted temporary WAV file """ try: # Read the file data if hasattr(uploaded_file, 'read'): file_data = uploaded_file.read() uploaded_file.seek(0) # Reset position for later use else: with open(uploaded_file, 'rb') as f: file_data = f.read() # Detect file type mime_type = detect_file_type(file_data) # Save to temporary input file suffix = '.tmp' if mime_type in AUDIO_MIMETYPES: suffix = '.audio' + suffix elif mime_type in VIDEO_MIMETYPES: suffix = '.video' + suffix with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_in: if hasattr(uploaded_file, 'read'): uploaded_file.seek(0) tmp_in.write(uploaded_file.read()) else: tmp_in.write(file_data) tmp_in_path = tmp_in.name # Create output WAV file output_path = tempfile.mktemp(suffix='.wav') try: # Build the ffmpeg conversion pipeline stream = ffmpeg.input(tmp_in_path) # Extract audio from video if needed if mime_type in VIDEO_MIMETYPES: stream = stream.audio # Convert to 16kHz mono WAV stream = ffmpeg.output( stream, output_path, acodec='pcm_s16le', # 16-bit PCM ac=1, # mono ar=target_sample_rate,# sample rate loglevel='error' # reduce ffmpeg output ) # Run the conversion ffmpeg.run(stream, overwrite_output=True) return output_path except ffmpeg.Error as e: raise RuntimeError(f"FFmpeg error during conversion: {e.stderr.decode()}") finally: # Clean up input temp file try: os.remove(tmp_in_path) except Exception: pass except Exception as e: raise RuntimeError(f"Failed to convert file to WAV: {str(e)}") # App UI st.markdown("""
Convert speech to text with the highest accuracy
Powered by NeMo ASR and Streamlit | Professional Arabic Transcription Service
©YahyaAlnwsany | 2025 Arabic Transcriber Pro | All rights reserved