Spaces:

divakaivan
/

korean_voice_assistant

Sleeping

korean_voice_assistant / app.py

Update app.py

a20e4bb verified over 1 year ago

1.5 kB

	from transformers import pipeline
	import gradio as gr
	from gtts import gTTS
	from openai import OpenAI



	# Load the Whisper model for speech-to-text
	pipe = pipeline(model="openai/whisper-small")

	# Load the text generation model
	# text_pipe = pipeline("text2text-generation", model="google/flan-t5-base")

	def generate_gpt_response(text, api_key):
	client = OpenAI(api_key=api_key)
	response = client.chat.completions.create(
	model='gpt-3.5-turbo-0125',
	messages=[{"role": "user", "content": text}]
	)

	return response.choices[0].message.content

	def transcribe(audio, api_key):
	# Transcribe the audio to text
	text = pipe(audio)["text"]

	# Generate a response from the transcribed text
	# lm_response = text_pipe(text)[0]["generated_text"]
	lm_response = generate_gpt_response(text, api_key)
	# Convert the response text to speech
	tts = gTTS(lm_response, lang='ko')

	# Save the generated audio
	out_audio = "output_audio.mp3"
	tts.save(out_audio)

	return out_audio

	# Create the Gradio interface
	iface = gr.Interface(
	fn=transcribe,
	inputs=[
	gr.Audio(type="filepath"),
	gr.Textbox(label="OpenAI API Key", type="password") # Add a textbox for the API key
	],
	outputs=gr.Audio(type="filepath"),
	title="Whisper Small Glaswegian",
	description="Realtime demo for Glaswegian speech recognition using a fine-tuned Whisper small model."
	)

	# Launch the interface
	iface.launch(share=True)