ahmadalfakeh commited on
Commit
e2bfbb0
1 Parent(s): adebaac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -2,14 +2,16 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
  from gtts import gTTS
5
- import speech_recognition as sr
6
- import pyaudio
7
  import io
8
  from tempfile import NamedTemporaryFile
9
 
10
  api = os.getenv("HF_API_TOKEN")
11
  client = InferenceClient("meta-llama/Meta-Llama-3.1-70B-Instruct", token=f"{api}")
12
 
 
 
 
13
  def respond(
14
  message,
15
  history: list[tuple[str, str]],
@@ -50,10 +52,14 @@ def text_to_speech(text):
50
  return f.read()
51
 
52
  def speech_to_text(audio):
53
- recognizer = sr.Recognizer()
54
- with sr.AudioFile(io.BytesIO(audio)) as source:
55
- audio_data = recognizer.record(source)
56
- return recognizer.recognize_google(audio_data)
 
 
 
 
57
 
58
  def process_audio(audio, system_message, max_tokens, temperature, top_p):
59
  text = speech_to_text(audio)
 
2
  from huggingface_hub import InferenceClient
3
  import os
4
  from gtts import gTTS
5
+ import whisper
 
6
  import io
7
  from tempfile import NamedTemporaryFile
8
 
9
  api = os.getenv("HF_API_TOKEN")
10
  client = InferenceClient("meta-llama/Meta-Llama-3.1-70B-Instruct", token=f"{api}")
11
 
12
+ # Load Whisper model
13
+ model = whisper.load_model("base") # or use 'small', 'medium', 'large', depending on your needs
14
+
15
  def respond(
16
  message,
17
  history: list[tuple[str, str]],
 
52
  return f.read()
53
 
54
  def speech_to_text(audio):
55
+ # Load audio data into a temporary file
56
+ with NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile:
57
+ tmpfile.write(audio)
58
+ tmpfile.flush()
59
+
60
+ # Transcribe audio with Whisper
61
+ result = model.transcribe(tmpfile.name)
62
+ return result['text']
63
 
64
  def process_audio(audio, system_message, max_tokens, temperature, top_p):
65
  text = speech_to_text(audio)