voice_to_text / app.py
Talha812's picture
Create app.py
4f0d39e verified
import gradio as gr
import whisper
from groq import Groq
from TTS.api import TTS
import torch
import os
# Load models
whisper_model = whisper.load_model("base")
tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available())
groq_client = Groq(api_key="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")
# Core pipeline
def voice_to_voice(audio):
# Transcribe
transcription = whisper_model.transcribe(audio)["text"]
# Generate response from Groq
response = groq_client.chat.completions.create(
messages=[{"role": "user", "content": transcription}],
model="llama3-70b-8192",
)
groq_text = response.choices[0].message.content
# Convert to speech
output_path = "response.wav"
tts_model.tts_to_file(text=groq_text, file_path=output_path)
return groq_text, output_path
# Gradio Interface
iface = gr.Interface(
fn=voice_to_voice,
inputs=gr.Audio(source="microphone", type="filepath", label="πŸŽ™οΈ Speak your question"),
outputs=[
gr.Textbox(label="πŸ“ Groq Answer"),
gr.Audio(label="🎧 AI Voice Reply"),
],
title="🎀 Voice Chat with Groq (LLaMA 3)",
description="Speak your question. Whisper will transcribe it, Groq will answer it, and TTS will reply in voice.",
live=False,
)
iface.launch()