Spaces:

CereusTech
/

Profero

Sleeping

App Files Files Community

Profero / app.py

Afeezee

Update app.py

d8f63f1 verified 4 months ago

raw

history blame contribute delete

5.03 kB

	import io
	import gradio as gr
	import numpy as np
	from groq import Groq
	from difflib import SequenceMatcher
	import soundfile as sf
	import os

	api_key = os.getenv("Profero")

	# Initialize Groq client with API key
	client = Groq(api_key= api_key)

	# Initialize score tracking
	score = 0
	attempts = 0

	# Function to generate a word using Llama model
	def generate_word():
	try:
	completion = client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=[
	{"role": "system", "content": "You are an experienced English professor with over 20 years experience teaching English and you are also a native speaker. You are trying to teach proper English pronunciation by generating words or phrases for user to pronounce and you judge them if it is correct or not. Make sure just a single word or a very concise phrase. Don't mention any other word apart from the word generated."},
	{"role": "user", "content": "Generate a word for pronunciation."}
	],
	temperature=1.4,
	max_tokens=4096,
	top_p=1,
	stream=True,
	)
	# Process streaming response
	word = ""
	for chunk in completion:
	delta_content = chunk.choices[0].delta.content
	if delta_content:
	word += delta_content
	word = word.strip().strip('"')
	return word
	except Exception as e:
	return f"Error generating word: {e}"

	# Function to check pronunciation
	def check_pronunciation(audio, word):
	global score, attempts
	attempts += 1
	try:
	# Determine the source of the audio and handle accordingly
	if isinstance(audio, tuple): # If the audio is a tuple, it's an uploaded file
	audio_filename = "user_audio.wav"
	sf.write(audio_filename, audio[1], samplerate=44100, format='WAV')
	else: # If it's not a tuple, it's recorded from the microphone
	audio_filename = "user_audio.m4a"
	with open(audio_filename, "wb") as f:
	f.write(audio) # Save the recorded audio as .m4a

	# Transcribe using Groq's Whisper API
	with open(audio_filename, "rb") as file:
	transcription = client.audio.transcriptions.create(
	file=(audio_filename, file.read()),
	model="distil-whisper-large-v3-en",
	temperature=0.28,
	response_format="verbose_json",
	)

	transcription_text = transcription.text # Corrected line

	# Compare transcription with the expected word
	similarity = SequenceMatcher(None, transcription_text.lower(), word.lower()).ratio()
	if similarity > 0.8: # Threshold for correct pronunciation
	score += 1
	result_text = f"Correct! Expected: {word}. You said: {transcription_text}"
	else:
	result_text = f"Incorrect. Expected: {word}. You said: {transcription_text}"

	return result_text, score
	except Exception as e:
	return f"Error checking pronunciation: {e}", score, None

	# Function to reset the test and display percentage
	def reset_test():
	global score, attempts
	if attempts > 0:
	percentage = (score / attempts) * 100
	else:
	percentage = 0
	final_score = (f"Your final score is {score}/{attempts}. "
	f"Percentage: {percentage:.2f}%")
	score = 0
	attempts = 0
	return final_score


	# Gradio Interface
	with gr.Blocks() as interface:
	gr.HTML("""
	<h1 style="text-align: center; font-weight: bold;">Profero</h1>
	<p style="text-align: center;">Profero is an interactive application designed to help users improve their English pronunciation skills. Users can practice pronouncing words generated by an advanced language model and receive immediate feedback on their performance. The application provides real-time transcription, scoring, and feedback to enhance learning and accuracy. You can upload a .WAV audio file or use your microphone to pronounce the word displayed. When using a microphone, make sure to trim your sound before submitting.</p>
	""")
	word_output = gr.Textbox(label="Word to Pronounce")
	result_output = gr.Textbox(label="Result")
	score_output = gr.Textbox(label="Score")

	# Initialize with a word
	initial_word = generate_word()
	word_output.value = initial_word

	# Generate new word on button click
	word_button = gr.Button("Get New Word")
	word_button.click(fn=generate_word, outputs=word_output)

	# Audio input for pronunciation checking
	audio_input = gr.Audio(type="numpy") # Handling both microphone and uploaded files
	submit_button = gr.Button("Submit Pronunciation")
	submit_button.click(fn=check_pronunciation, inputs=[audio_input, word_output], outputs=[result_output, score_output])

	# Reset button to stop and show score
	stop_button = gr.Button("Stop")
	stop_button.click(fn=reset_test, outputs=score_output)

	interface.launch()