salomonsky commited on
Commit
afd9000
verified
1 Parent(s): 7063fa2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
4
+ from gtts import gTTS
5
+ import numpy as np
6
+ import sounddevice as sd
7
+
8
+ class VoiceRecognition:
9
+ def __init__(self):
10
+ self.processor = Wav2Vec2Processor.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
11
+ self.model = Wav2Vec2ForCTC.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
12
+ self.sample_rate = 16000
13
+
14
+ def listen(self):
15
+ st.write("Escuchando...")
16
+ audio_data = sd.rec(int(self.sample_rate * 5), samplerate=self.sample_rate, channels=1, dtype='float32')
17
+ sd.wait()
18
+ st.write("Grabaci贸n terminada.")
19
+ return audio_data.flatten()
20
+
21
+ def vad(self, audio):
22
+ threshold = 0.02
23
+ return audio[np.abs(audio) > threshold]
24
+
25
+ def transcribe(self, audio):
26
+ input_values = self.processor(audio, return_tensors="pt", sampling_rate=self.sample_rate).input_values
27
+ with torch.no_grad():
28
+ logits = self.model(input_values).logits
29
+ predicted_ids = torch.argmax(logits, dim=-1)
30
+ return self.processor.decode(predicted_ids[0])
31
+
32
+ def text_to_speech(self, text):
33
+ tts = gTTS(text=text, lang='es')
34
+ output_path = "response.mp3"
35
+ tts.save(output_path)
36
+ return output_path
37
+
38
+ def main():
39
+ st.title("Asistente de Voz - Reconocimiento de Voz")
40
+ recognizer = VoiceRecognition()
41
+
42
+ if st.button("Iniciar Grabaci贸n"):
43
+ audio = recognizer.listen()
44
+ audio_vad = recognizer.vad(audio)
45
+
46
+ if audio_vad.size > 0:
47
+ transcription = recognizer.transcribe(audio_vad)
48
+ st.write(f"Texto transcrito: {transcription}")
49
+ audio_path = recognizer.text_to_speech(transcription)
50
+ st.audio(audio_path)
51
+ else:
52
+ st.write("No se detect贸 actividad de voz.")
53
+
54
+ if __name__ == "__main__":
55
+ main()