Spaces:

AI-Edify
/

Pronounciation_Assistant

Sleeping

App Files Files Community

Pronounciation_Assistant / app.py

AI-Edify

Update app.py

1965a2d verified 6 months ago

raw

history blame

3.08 kB

	import os
	import gradio as gr
	import openai
	from openai import OpenAI
	import speech_recognition as sr
	import threading
	import time

	# Initialize OpenAI client with API key from environment variable
	client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

	# Create an assistant
	assistant = client.beta.assistants.create(
	name="Pronunciation Assistant",
	instructions="You are a helpful pronunciation assistant. You compare the generated text with the user's transcription and then provide feedback on how the user can improve their pronunciation accordingly. You also single out specific words they pronounced incorrectly and give tips on how to improve like for example 'schedule' can be pronounced as 'sked-jool'.",
	model="gpt-4-1106-preview"
	)

	def generate_text():
	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "Generate a short paragraph (2-3 sentences) for an English learner to read aloud."},
	{"role": "user", "content": "Create a practice text."}
	]
	)
	return response.choices[0].message.content

	def get_pronunciation_feedback(original_text, transcription):
	thread = client.beta.threads.create()

	message = client.beta.threads.messages.create(
	thread_id=thread.id,
	role="user",
	content=f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."
	)

	run = client.beta.threads.runs.create(
	thread_id=thread.id,
	assistant_id=assistant.id
	)

	while run.status != "completed":
	time.sleep(1)
	run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)

	messages = client.beta.threads.messages.list(thread_id=thread.id)
	return messages.data[0].content[0].text.value

	def transcribe_audio_realtime(audio):
	recognizer = sr.Recognizer()
	with sr.AudioFile(audio) as source:
	audio_data = recognizer.record(source)
	try:
	return recognizer.recognize_google(audio_data)
	except sr.UnknownValueError:
	return "Could not understand audio"
	except sr.RequestError:
	return "Could not request results from the speech recognition service"

	def practice_pronunciation(audio):
	original_text = generate_text()
	transcription = transcribe_audio_realtime(audio)
	feedback = get_pronunciation_feedback(original_text, transcription)
	return original_text, transcription, feedback

	# Gradio interface
	demo = gr.Interface(
	fn=practice_pronunciation,
	inputs=[
	gr.Audio(type="filepath") # Removed 'source="microphone"'
	],
	outputs=[
	gr.Textbox(label="Text to Read"),
	gr.Textbox(label="Your Transcription"),
	gr.Textbox(label="Pronunciation Feedback")
	],
	title="Pronunciation Practice Tool",
	description="Read the generated text aloud. The system will transcribe your speech and provide pronunciation feedback.",
	live=True
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()