Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
import speech_recognition as sr | |
from pydub import AudioSegment | |
import os | |
# Constants | |
MAX_AUDIO_DURATION = 600 # in seconds | |
# --- Helper: Convert audio to wav --- | |
def convert_audio_to_wav(file_path): | |
audio = AudioSegment.from_file(file_path) | |
wav_path = file_path.replace(file_path.split(".")[-1], "wav") | |
audio.export(wav_path, format="wav") | |
return wav_path | |
# --- Helper: Transcribe audio in chunks --- | |
def transcribe_audio_in_chunks(audio_path, chunk_duration=30): | |
recognizer = sr.Recognizer() | |
audio = AudioSegment.from_wav(audio_path) | |
if len(audio) > MAX_AUDIO_DURATION * 1000: | |
audio = audio[:MAX_AUDIO_DURATION * 1000] | |
full_text = [] | |
for i in range(0, len(audio), chunk_duration * 1000): | |
chunk = audio[i: i + chunk_duration * 1000] | |
chunk_path = "temp_chunk.wav" | |
chunk.export(chunk_path, format="wav") | |
with sr.AudioFile(chunk_path) as source: | |
audio_data = recognizer.record(source) | |
try: | |
text = recognizer.recognize_google(audio_data, language="en-IN") | |
full_text.append(text) | |
except sr.UnknownValueError: | |
full_text.append("[Unrecognized Audio]") | |
except sr.RequestError as e: | |
full_text.append(f"[Speech Error: {e}]") | |
return " ".join(full_text) | |
# --- Main Function --- | |
def transcribe_audio(audio): | |
if not audio.endswith(".wav"): | |
audio = convert_audio_to_wav(audio) | |
transcription = transcribe_audio_in_chunks(audio) | |
return transcription | |
# --- Gradio UI --- | |
iface = gr.Interface( | |
fn=transcribe_audio, | |
inputs=[ | |
gr.Audio(sources=["microphone", "upload"], type="filepath", label="Input English Audio") | |
], | |
outputs=[ | |
gr.Textbox(label="Transcribed Text") | |
], | |
title="English Speech Recognition", | |
description="Upload or record English audio β Transcribe to text.", | |
allow_flagging="never" | |
) | |
iface.launch(debug=True, share=True) | |