File size: 1,858 Bytes
79bdf4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import whisper
import gradio as gr
from groq import Groq
from TTS.api import TTS

# βœ… Load OpenAI Whisper model
whisper_model = whisper.load_model("base")

# βœ… Function to Transcribe Speech to Text
def transcribe_audio(audio):
    print("πŸ“ Transcribing...")
    result = whisper_model.transcribe(audio)
    return result["text"]

# βœ… Function to Get Response from Groq API
def get_groq_response(prompt):
    print("πŸ€– Generating Response...")
    client = Groq(api_key=os.environ["GROQ_API_KEY"])
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model="llama-3.3-70b-versatile"
    )
    return chat_completion.choices[0].message.content

# βœ… Function to Convert Text to Speech using Coqui TTS
def text_to_speech(text):
    print("πŸ”Š Generating Speech...")
    tts = TTS(model_name="tts_models/en/ljspeech/glow-tts")
    output_file = "response.wav"
    tts.tts_to_file(text=text, file_path=output_file)
    return output_file

# βœ… Gradio Interface Function
def chatbot_pipeline(audio):
    # Step 1: Convert Speech to Text
    text = transcribe_audio(audio)
    
    # Step 2: Get Response from Groq API
    response = get_groq_response(text)
    
    # Step 3: Convert Text Response to Speech
    speech_file = text_to_speech(response)
    
    return text, response, speech_file

# βœ… Build Gradio UI
interface = gr.Interface(
    fn=chatbot_pipeline,
    inputs=gr.Audio(type="filepath"),  # Mic input
    outputs=[
        gr.Textbox(label="Transcribed Text"),
        gr.Textbox(label="Chatbot Response"),
        gr.Audio(label="Generated Speech")
    ],
    title="πŸ—£οΈ Speech-to-Text AI Chatbot",
    description="πŸŽ™οΈ Speak into the microphone β†’ Get AI response β†’ Listen to the reply!"
)

# βœ… Launch Gradio UI
interface.launch(share=True)