import os import whisper import gradio as gr from groq import Groq from TTS.api import TTS # ✅ Load OpenAI Whisper model whisper_model = whisper.load_model("base") # ✅ Function to Transcribe Speech to Text def transcribe_audio(audio): print("📝 Transcribing...") result = whisper_model.transcribe(audio) return result["text"] # ✅ Function to Get Response from Groq API def get_groq_response(prompt): print("🤖 Generating Response...") client = Groq(api_key=os.environ["GROQ_API_KEY"]) chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama-3.3-70b-versatile" ) return chat_completion.choices[0].message.content # ✅ Function to Convert Text to Speech using Coqui TTS def text_to_speech(text): print("🔊 Generating Speech...") tts = TTS(model_name="tts_models/en/ljspeech/glow-tts") output_file = "response.wav" tts.tts_to_file(text=text, file_path=output_file) return output_file # ✅ Gradio Interface Function def chatbot_pipeline(audio): # Step 1: Convert Speech to Text text = transcribe_audio(audio) # Step 2: Get Response from Groq API response = get_groq_response(text) # Step 3: Convert Text Response to Speech speech_file = text_to_speech(response) return text, response, speech_file # ✅ Build Gradio UI interface = gr.Interface( fn=chatbot_pipeline, inputs=gr.Audio(type="filepath"), # Mic input outputs=[ gr.Textbox(label="Transcribed Text"), gr.Textbox(label="Chatbot Response"), gr.Audio(label="Generated Speech") ], title="🗣️ Speech-to-Text AI Chatbot", description="🎙️ Speak into the microphone → Get AI response → Listen to the reply!" ) # ✅ Launch Gradio UI interface.launch(share=True)