Spaces:

AI-Edify
/

Pronounciation_Assistant

Sleeping

App Files Files Community

AI-Edify commited on Sep 15, 2024

Commit

2516100

verified ·

1 Parent(s): a83e487

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -38

app.py CHANGED Viewed

@@ -3,10 +3,9 @@ import gradio as gr
 import openai
 import speech_recognition as sr
 import logging
-import traceback
 # Set up logging
-logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
 # Set OpenAI API key
@@ -28,8 +27,6 @@ def generate_text():
 def get_pronunciation_feedback(original_text, transcription):
     try:
-        logger.debug(f"Original text: {original_text}")
-        logger.debug(f"Transcription: {transcription}")
         response = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
             messages=[
@@ -37,73 +34,73 @@ def get_pronunciation_feedback(original_text, transcription):
                 {"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
             ]
         )
-        feedback = response.choices[0].message['content']
-        logger.debug(f"Generated feedback: {feedback}")
-        return feedback
     except Exception as e:
         logger.error(f"Error in get_pronunciation_feedback: {str(e)}")
-        logger.error(traceback.format_exc())
         return "Error generating feedback. Please try again."
 def transcribe_audio_realtime(audio):
     try:
-        logger.debug(f"Received audio file: {audio}")
         recognizer = sr.Recognizer()
         with sr.AudioFile(audio) as source:
-            logger.debug("Reading audio file")
             audio_data = recognizer.record(source)
-        logger.debug("Transcribing audio")
-        transcription = recognizer.recognize_google(audio_data)
-        logger.debug(f"Transcription result: {transcription}")
-        return transcription
     except sr.UnknownValueError:
-        logger.warning("Could not understand audio")
         return "Could not understand audio"
     except sr.RequestError as e:
         logger.error(f"Could not request results from the speech recognition service; {str(e)}")
         return "Error in speech recognition service"
     except Exception as e:
         logger.error(f"Error in transcribe_audio_realtime: {str(e)}")
-        logger.error(traceback.format_exc())
         return "Error transcribing audio. Please try again."
 def practice_pronunciation(audio, text_to_read):
-    logger.info("Starting practice_pronunciation function")
     if not text_to_read:
-        logger.info("Generating new text to read")
         text_to_read = generate_text()
-    logger.info(f"Text to read: {text_to_read}")
-    logger.info("Starting transcription")
     transcription = transcribe_audio_realtime(audio)
-    logger.info(f"Transcription result: {transcription}")
-    logger.info("Getting pronunciation feedback")
     feedback = get_pronunciation_feedback(text_to_read, transcription)
-    logger.info(f"Feedback generated: {feedback}")
     return text_to_read, transcription, feedback
-# Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# Pronunciation Practice Tool")
-    gr.Markdown("Generate a text to read, then record yourself reading it. The system will provide pronunciation feedback.")
-    with gr.Row():
-        text_to_read = gr.Textbox(label="Text to Read")
-        generate_button = gr.Button("Generate New Text")
-    audio_input = gr.Audio(type="filepath", label="Record your voice")
-    with gr.Row():
-        transcription_output = gr.Textbox(label="Your Transcription")
-        feedback_output = gr.Textbox(label="Pronunciation Feedback")
-    submit_button = gr.Button("Submit")
     generate_button.click(generate_text, outputs=text_to_read)
     submit_button.click(practice_pronunciation, inputs=[audio_input, text_to_read], outputs=[text_to_read, transcription_output, feedback_output])
 # Launch the app
 if __name__ == "__main__":
     demo.launch()

 import openai
 import speech_recognition as sr
 import logging
 # Set up logging
+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Set OpenAI API key
 def get_pronunciation_feedback(original_text, transcription):
     try:
         response = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
             messages=[
                 {"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
             ]
         )
+        return response.choices[0].message['content']
     except Exception as e:
         logger.error(f"Error in get_pronunciation_feedback: {str(e)}")
         return "Error generating feedback. Please try again."
 def transcribe_audio_realtime(audio):
     try:
         recognizer = sr.Recognizer()
         with sr.AudioFile(audio) as source:
             audio_data = recognizer.record(source)
+        return recognizer.recognize_google(audio_data)
     except sr.UnknownValueError:
         return "Could not understand audio"
     except sr.RequestError as e:
         logger.error(f"Could not request results from the speech recognition service; {str(e)}")
         return "Error in speech recognition service"
     except Exception as e:
         logger.error(f"Error in transcribe_audio_realtime: {str(e)}")
         return "Error transcribing audio. Please try again."
 def practice_pronunciation(audio, text_to_read):
     if not text_to_read:
         text_to_read = generate_text()
     transcription = transcribe_audio_realtime(audio)
     feedback = get_pronunciation_feedback(text_to_read, transcription)
     return text_to_read, transcription, feedback
+# Custom CSS for improved styling
+custom_css = """
+.container {max-width: 800px; margin: auto; padding: 20px;}
+.title {text-align: center; color: #2c3e50; margin-bottom: 20px;}
+.subtitle {text-align: center; color: #34495e; margin-bottom: 30px;}
+.input-section, .output-section {background-color: #ecf0f1; padding: 20px; border-radius: 10px; margin-bottom: 20px;}
+.input-section h3, .output-section h3 {color: #2980b9; margin-bottom: 10px;}
+.button-primary {background-color: #3498db !important;}
+.button-secondary {background-color: #2ecc71 !important;}
+"""
+# Gradio interface with improved UI
+with gr.Blocks(css=custom_css) as demo:
+    gr.HTML("<div class='container'>")
+    gr.HTML("<h1 class='title'>Pronunciation Practice Tool</h1>")
+    gr.HTML("<p class='subtitle'>Improve your English pronunciation with AI-powered feedback</p>")
+    with gr.Box(className="input-section"):
+        gr.HTML("<h3>Step 1: Get Text to Read</h3>")
+        with gr.Row():
+            text_to_read = gr.Textbox(label="Text to Read", placeholder="Click 'Generate New Text' or type your own text here")
+            generate_button = gr.Button("Generate New Text", variant="primary", className="button-primary")
+    with gr.Box(className="input-section"):
+        gr.HTML("<h3>Step 2: Record Your Voice</h3>")
+        audio_input = gr.Audio(type="filepath", label="Record your voice reading the text above")
+    with gr.Box(className="output-section"):
+        gr.HTML("<h3>Step 3: Get Feedback</h3>")
+        with gr.Row():
+            transcription_output = gr.Textbox(label="Your Transcription", lines=3)
+            feedback_output = gr.Textbox(label="Pronunciation Feedback", lines=5)
+    submit_button = gr.Button("Submit for Feedback", variant="secondary", className="button-secondary")
     generate_button.click(generate_text, outputs=text_to_read)
     submit_button.click(practice_pronunciation, inputs=[audio_input, text_to_read], outputs=[text_to_read, transcription_output, feedback_output])
+gr.HTML("</div>")
 # Launch the app
 if __name__ == "__main__":
     demo.launch()