import gradio as gr import speech_recognition as sr from deep_translator import GoogleTranslator import tempfile import os import time import requests # Needed for the new API import base64 # Needed to decode the audio # --- CONFIGURATION --- # 🔴 PASTE YOUR NGROK URL HERE (Just the domain, no /v1 or /generate_audio) # Example: "https://keen-marten-tops.ngrok-free.app" NGROK_BASE_URL = "https://keen-marten-tops.ngrok-free.app" r = sr.Recognizer() def recognize_kannada(audio_path): """Converts Audio file to Kannada Text""" print("\n------------------------------------------------------") print(f"👂 Listening to audio file: {audio_path}") try: with sr.AudioFile(audio_path) as source: audio = r.record(source) # Using Google Speech Recognition text = r.recognize_google(audio, language="kn-IN") print(f"✅ KANNADA HEARD: {text}") return text except Exception as e: print(f"❌ Recognition Error: {e}") return "(Nothing heard)" def translate_to_english(text): """Translates with a RETRY mechanism to fix network errors""" if not text or text.startswith("(") or text == "(Nothing heard)": return "…" # Try up to 3 times before failing for attempt in range(3): try: translated = GoogleTranslator(source='kn', target='en').translate(text=text) print(f"🇺🇸 ENGLISH TRANSLATION: {translated}") return translated except Exception as e: print(f"⚠️ Translation Attempt {attempt+1} failed. Retrying in 2 seconds...") time.sleep(2) # Wait 2 seconds and try again print(f"❌ All attempts failed. Check internet.") return "(Translation Failed - Check Internet)" def generate_english_speech(text): """Converts English Text to Audio using your Custom Flask Server""" if not text or text == "…" or text.startswith("(Translation Failed"): return None api_url = f"{NGROK_BASE_URL}/generate_audio" try: print(f"🗣️ Sending text to Server ({api_url}): '{text}'") # Payload matching your Flask server's expectation payload = { "text": text, "speaker_id": 0 # Default speaker } response = requests.post(api_url, json=payload) if response.status_code == 200: data = response.json() if 'generated_audio_base64' in data: # Decode the Base64 string back to binary audio audio_bytes = base64.b64decode(data['generated_audio_base64']) # Save to a temporary file tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") tmp.write(audio_bytes) tmp.close() print("✅ Audio Received and Decoded!") return tmp.name else: print(f"❌ Server Error: Key 'generated_audio_base64' missing. Response: {data}") return None else: print(f"❌ Server Error {response.status_code}: {response.text}") return None except Exception as e: print(f"❌ Connection Error: {e}") return None def process_audio(audio_path, history): if not audio_path: return history, None, history # 1. Get Kannada Text kn_text = recognize_kannada(audio_path) # 2. Translate to English en_text = translate_to_english(kn_text) # 3. Generate Audio (Via Your Custom Flask API) tts_path = generate_english_speech(en_text) # 4. Update UI history.append({"role": "user", "content": {"path": audio_path}}) if tts_path: history.append({"role": "assistant", "content": {"path": tts_path}}) history.append({"role": "assistant", "content": f"🇺🇸 {en_text}\n(🇮🇳 {kn_text})"}) return history, tts_path, history # --- UI --- with gr.Blocks(title="Kannada Voice Clone (Custom Server)", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🐘 Kannada Input ➡️ 🇺🇸 English Output (Custom CSM Model)") chatbot = gr.Chatbot(type="messages", height=450, label="Live Conversation") hidden_player = gr.Audio(visible=False, autoplay=True) with gr.Tabs(): with gr.TabItem("🎙️ Record Voice"): mic_input = gr.Audio(sources=["microphone"], type="filepath", show_download_button=False) with gr.TabItem("📂 Upload Audio File"): file_input = gr.Audio(sources=["upload"], type="filepath", show_download_button=True) clear_btn = gr.Button("Clear Chat") chat_history = gr.State([]) # Logic Wiring mic_input.stop_recording( fn=process_audio, inputs=[mic_input, chat_history], outputs=[chatbot, hidden_player, chat_history] ) file_input.upload( fn=process_audio, inputs=[file_input, chat_history], outputs=[chatbot, hidden_player, chat_history] ) clear_btn.click(lambda: ([], None, []), None, [chatbot, hidden_player, chat_history]) if __name__ == "__main__": print(f"🚀 Starting App connected to: {NGROK_BASE_URL}") demo.launch()