Spaces:

ranialahassn
/

languagedetectorWhisper

Running

App Files Files Community

ranialahassn commited on Jun 5

Commit

8660e20

verified ·

1 Parent(s): 393e560

Create app.py

Browse files

Files changed (1) hide show

app.py +79 -0

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import gradio as gr
+import torch
+import torchaudio
+import numpy as np
+import librosa
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+from transformers import XLMRobertaTokenizerFast, XLMRobertaForSequenceClassification
+import soundfile as sf
+# --- Load models ---
+whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-base")
+whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
+lang_tokenizer = XLMRobertaTokenizerFast.from_pretrained("papluca/xlm-roberta-base-language-detection")
+lang_model = XLMRobertaForSequenceClassification.from_pretrained("papluca/xlm-roberta-base-language-detection")
+# --- Convert audio to text ---
+def audio_to_text(audio_path):
+    audio_input, sample_rate = torchaudio.load(audio_path)
+    if sample_rate != 16000:
+        audio_input = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(audio_input)
+    input_features = whisper_processor(
+        audio_input.squeeze(), sampling_rate=16000, return_tensors="pt"
+    ).input_features
+    predicted_ids = whisper_model.generate(input_features)
+    transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+    return transcription.strip()
+# --- Detect language from text ---
+def detect_language(text):
+    inputs = lang_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        outputs = lang_model(**inputs)
+    probs = torch.nn.functional.softmax(outputs.logits, dim=1)
+    pred_idx = probs.argmax().item()
+    pred_label = lang_model.config.id2label[pred_idx]
+    confidence = probs[0][pred_idx].item() * 100
+    return f"🌐 Language: {pred_label} | Confidence: {confidence:.2f}%"
+# --- Gradio function ---
+def detect_language_from_audio(audio_file):
+    if audio_file is None:
+        return "❌ No file selected."
+    try:
+        # Save audio temporarily in WAV format if needed
+        temp_wav = "temp.wav"
+        data, sr = librosa.load(audio_file, sr=16000)
+        sf.write(temp_wav, data, sr)
+        # Step 1: Convert audio to text
+        text = audio_to_text(temp_wav)
+        if not text:
+            return "❌ Failed to extract text from audio."
+        # Step 2: Detect language
+        return detect_language(text)
+    except Exception as e:
+        return f"❌ Runtime error: {str(e)}"
+# --- Gradio Interface ---
+iface = gr.Interface(
+    fn=detect_language_from_audio,
+    inputs=gr.Audio(type="filepath", label="Choose Audio File (WAV/MP3)"),
+    outputs=gr.Textbox(label="Result"),
+    title="🎙️ Voice Language Detector",
+    description="Upload a voice file and the model will detect its language using Whisper + XLM-Roberta."
+)
+# --- Entry point ---
+if __name__ == "__main__":
+    iface.launch()