Spaces:
Sleeping
Sleeping
File size: 1,858 Bytes
79bdf4c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import os
import whisper
import gradio as gr
from groq import Groq
from TTS.api import TTS
# β
Load OpenAI Whisper model
whisper_model = whisper.load_model("base")
# β
Function to Transcribe Speech to Text
def transcribe_audio(audio):
print("π Transcribing...")
result = whisper_model.transcribe(audio)
return result["text"]
# β
Function to Get Response from Groq API
def get_groq_response(prompt):
print("π€ Generating Response...")
client = Groq(api_key=os.environ["GROQ_API_KEY"])
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama-3.3-70b-versatile"
)
return chat_completion.choices[0].message.content
# β
Function to Convert Text to Speech using Coqui TTS
def text_to_speech(text):
print("π Generating Speech...")
tts = TTS(model_name="tts_models/en/ljspeech/glow-tts")
output_file = "response.wav"
tts.tts_to_file(text=text, file_path=output_file)
return output_file
# β
Gradio Interface Function
def chatbot_pipeline(audio):
# Step 1: Convert Speech to Text
text = transcribe_audio(audio)
# Step 2: Get Response from Groq API
response = get_groq_response(text)
# Step 3: Convert Text Response to Speech
speech_file = text_to_speech(response)
return text, response, speech_file
# β
Build Gradio UI
interface = gr.Interface(
fn=chatbot_pipeline,
inputs=gr.Audio(type="filepath"), # Mic input
outputs=[
gr.Textbox(label="Transcribed Text"),
gr.Textbox(label="Chatbot Response"),
gr.Audio(label="Generated Speech")
],
title="π£οΈ Speech-to-Text AI Chatbot",
description="ποΈ Speak into the microphone β Get AI response β Listen to the reply!"
)
# β
Launch Gradio UI
interface.launch(share=True)
|