Spaces:
Runtime error
Runtime error
File size: 1,908 Bytes
afd9000 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import streamlit as st
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
from gtts import gTTS
import numpy as np
import sounddevice as sd
class VoiceRecognition:
def __init__(self):
self.processor = Wav2Vec2Processor.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
self.model = Wav2Vec2ForCTC.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
self.sample_rate = 16000
def listen(self):
st.write("Escuchando...")
audio_data = sd.rec(int(self.sample_rate * 5), samplerate=self.sample_rate, channels=1, dtype='float32')
sd.wait()
st.write("Grabaci贸n terminada.")
return audio_data.flatten()
def vad(self, audio):
threshold = 0.02
return audio[np.abs(audio) > threshold]
def transcribe(self, audio):
input_values = self.processor(audio, return_tensors="pt", sampling_rate=self.sample_rate).input_values
with torch.no_grad():
logits = self.model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
return self.processor.decode(predicted_ids[0])
def text_to_speech(self, text):
tts = gTTS(text=text, lang='es')
output_path = "response.mp3"
tts.save(output_path)
return output_path
def main():
st.title("Asistente de Voz - Reconocimiento de Voz")
recognizer = VoiceRecognition()
if st.button("Iniciar Grabaci贸n"):
audio = recognizer.listen()
audio_vad = recognizer.vad(audio)
if audio_vad.size > 0:
transcription = recognizer.transcribe(audio_vad)
st.write(f"Texto transcrito: {transcription}")
audio_path = recognizer.text_to_speech(transcription)
st.audio(audio_path)
else:
st.write("No se detect贸 actividad de voz.")
if __name__ == "__main__":
main()
|