Spaces:
Runtime error
Runtime error
import streamlit as st | |
import torch | |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC | |
from gtts import gTTS | |
import numpy as np | |
import sounddevice as sd | |
class VoiceRecognition: | |
def __init__(self): | |
self.processor = Wav2Vec2Processor.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") | |
self.model = Wav2Vec2ForCTC.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") | |
self.sample_rate = 16000 | |
def listen(self): | |
st.write("Escuchando...") | |
audio_data = sd.rec(int(self.sample_rate * 5), samplerate=self.sample_rate, channels=1, dtype='float32') | |
sd.wait() | |
st.write("Grabaci贸n terminada.") | |
return audio_data.flatten() | |
def vad(self, audio): | |
threshold = 0.02 | |
return audio[np.abs(audio) > threshold] | |
def transcribe(self, audio): | |
input_values = self.processor(audio, return_tensors="pt", sampling_rate=self.sample_rate).input_values | |
with torch.no_grad(): | |
logits = self.model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
return self.processor.decode(predicted_ids[0]) | |
def text_to_speech(self, text): | |
tts = gTTS(text=text, lang='es') | |
output_path = "response.mp3" | |
tts.save(output_path) | |
return output_path | |
def main(): | |
st.title("Asistente de Voz - Reconocimiento de Voz") | |
recognizer = VoiceRecognition() | |
if st.button("Iniciar Grabaci贸n"): | |
audio = recognizer.listen() | |
audio_vad = recognizer.vad(audio) | |
if audio_vad.size > 0: | |
transcription = recognizer.transcribe(audio_vad) | |
st.write(f"Texto transcrito: {transcription}") | |
audio_path = recognizer.text_to_speech(transcription) | |
st.audio(audio_path) | |
else: | |
st.write("No se detect贸 actividad de voz.") | |
if __name__ == "__main__": | |
main() | |