Spaces:

divakaivan
/

korean_voice_assistant

Sleeping

divakaivan commited on Jul 18, 2024

Commit

a20e4bb

verified ·

1 Parent(s): 1ede7b6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,8 @@ from transformers import pipeline
 import gradio as gr
 from gtts import gTTS
 from openai import OpenAI
-client = OpenAI()
 # Load the Whisper model for speech-to-text
 pipe = pipeline(model="openai/whisper-small")
@@ -10,7 +11,8 @@ pipe = pipeline(model="openai/whisper-small")
 # Load the text generation model
 # text_pipe = pipeline("text2text-generation", model="google/flan-t5-base")
-def generate_gpt_response(text):
     response = client.chat.completions.create(
         model='gpt-3.5-turbo-0125',
         messages=[{"role": "user", "content": text}]
@@ -18,13 +20,13 @@ def generate_gpt_response(text):
     return response.choices[0].message.content
-def transcribe(audio):
     # Transcribe the audio to text
     text = pipe(audio)["text"]
     # Generate a response from the transcribed text
     # lm_response = text_pipe(text)[0]["generated_text"]
-    lm_response = generate_gpt_response(text)
     # Convert the response text to speech
     tts = gTTS(lm_response, lang='ko')
@@ -37,7 +39,10 @@ def transcribe(audio):
 # Create the Gradio interface
 iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(type="filepath"),
     outputs=gr.Audio(type="filepath"),
     title="Whisper Small Glaswegian",
     description="Realtime demo for Glaswegian speech recognition using a fine-tuned Whisper small model."

 import gradio as gr
 from gtts import gTTS
 from openai import OpenAI
 # Load the Whisper model for speech-to-text
 pipe = pipeline(model="openai/whisper-small")
 # Load the text generation model
 # text_pipe = pipeline("text2text-generation", model="google/flan-t5-base")
+def generate_gpt_response(text, api_key):
+    client = OpenAI(api_key=api_key)
     response = client.chat.completions.create(
         model='gpt-3.5-turbo-0125',
         messages=[{"role": "user", "content": text}]
     return response.choices[0].message.content
+def transcribe(audio, api_key):
     # Transcribe the audio to text
     text = pipe(audio)["text"]
     # Generate a response from the transcribed text
     # lm_response = text_pipe(text)[0]["generated_text"]
+    lm_response = generate_gpt_response(text, api_key)
     # Convert the response text to speech
     tts = gTTS(lm_response, lang='ko')
 # Create the Gradio interface
 iface = gr.Interface(
     fn=transcribe,
+    inputs=[
+        gr.Audio(type="filepath"),
+        gr.Textbox(label="OpenAI API Key", type="password")  # Add a textbox for the API key
+    ],
     outputs=gr.Audio(type="filepath"),
     title="Whisper Small Glaswegian",
     description="Realtime demo for Glaswegian speech recognition using a fine-tuned Whisper small model."