Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
-
import os
|
2 |
import gradio as gr
|
3 |
import openai
|
4 |
import speech_recognition as sr
|
5 |
import logging
|
|
|
6 |
|
7 |
# Set up logging
|
8 |
-
logging.basicConfig(level=logging.
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
# Set OpenAI API key
|
@@ -27,6 +27,8 @@ def generate_text():
|
|
27 |
|
28 |
def get_pronunciation_feedback(original_text, transcription):
|
29 |
try:
|
|
|
|
|
30 |
response = openai.ChatCompletion.create(
|
31 |
model="gpt-3.5-turbo",
|
32 |
messages=[
|
@@ -34,73 +36,52 @@ def get_pronunciation_feedback(original_text, transcription):
|
|
34 |
{"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
|
35 |
]
|
36 |
)
|
37 |
-
|
|
|
|
|
38 |
except Exception as e:
|
39 |
logger.error(f"Error in get_pronunciation_feedback: {str(e)}")
|
|
|
40 |
return "Error generating feedback. Please try again."
|
41 |
|
42 |
def transcribe_audio_realtime(audio):
|
|
|
|
|
|
|
43 |
try:
|
|
|
44 |
recognizer = sr.Recognizer()
|
45 |
with sr.AudioFile(audio) as source:
|
|
|
46 |
audio_data = recognizer.record(source)
|
47 |
-
|
|
|
|
|
|
|
48 |
except sr.UnknownValueError:
|
|
|
49 |
return "Could not understand audio"
|
50 |
except sr.RequestError as e:
|
51 |
logger.error(f"Could not request results from the speech recognition service; {str(e)}")
|
52 |
return "Error in speech recognition service"
|
53 |
except Exception as e:
|
54 |
logger.error(f"Error in transcribe_audio_realtime: {str(e)}")
|
|
|
55 |
return "Error transcribing audio. Please try again."
|
56 |
|
57 |
def practice_pronunciation(audio, text_to_read):
|
|
|
58 |
if not text_to_read:
|
|
|
59 |
text_to_read = generate_text()
|
60 |
-
|
61 |
-
feedback = get_pronunciation_feedback(text_to_read, transcription)
|
62 |
-
return text_to_read, transcription, feedback
|
63 |
-
|
64 |
-
# Custom CSS for improved styling
|
65 |
-
custom_css = """
|
66 |
-
.container {max-width: 800px; margin: auto; padding: 20px;}
|
67 |
-
.title {text-align: center; color: #2c3e50; margin-bottom: 20px;}
|
68 |
-
.subtitle {text-align: center; color: #34495e; margin-bottom: 30px;}
|
69 |
-
.input-section, .output-section {background-color: #ecf0f1; padding: 20px; border-radius: 10px; margin-bottom: 20px;}
|
70 |
-
.input-section h3, .output-section h3 {color: #2980b9; margin-bottom: 10px;}
|
71 |
-
.button-primary {background-color: #3498db !important;}
|
72 |
-
.button-secondary {background-color: #2ecc71 !important;}
|
73 |
-
"""
|
74 |
-
|
75 |
-
# Gradio interface with improved UI
|
76 |
-
with gr.Blocks(css=custom_css) as demo:
|
77 |
-
gr.HTML("<div class='container'>")
|
78 |
-
gr.HTML("<h1 class='title'>Pronunciation Practice Tool</h1>")
|
79 |
-
gr.HTML("<p class='subtitle'>Improve your English pronunciation with AI-powered feedback</p>")
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
text_to_read = gr.Textbox(label="Text to Read", placeholder="Click 'Generate New Text' or type your own text here")
|
85 |
-
generate_button = gr.Button("Generate New Text", variant="primary", className="button-primary")
|
86 |
-
|
87 |
-
with gr.Box(className="input-section"):
|
88 |
-
gr.HTML("<h3>Step 2: Record Your Voice</h3>")
|
89 |
-
audio_input = gr.Audio(type="filepath", label="Record your voice reading the text above")
|
90 |
-
|
91 |
-
with gr.Box(className="output-section"):
|
92 |
-
gr.HTML("<h3>Step 3: Get Feedback</h3>")
|
93 |
-
with gr.Row():
|
94 |
-
transcription_output = gr.Textbox(label="Your Transcription", lines=3)
|
95 |
-
feedback_output = gr.Textbox(label="Pronunciation Feedback", lines=5)
|
96 |
|
97 |
-
|
|
|
|
|
98 |
|
99 |
-
|
100 |
-
submit_button.click(practice_pronunciation, inputs=[audio_input, text_to_read], outputs=[text_to_read, transcription_output, feedback_output])
|
101 |
-
|
102 |
-
gr.HTML("</div>")
|
103 |
-
|
104 |
-
# Launch the app
|
105 |
-
if __name__ == "__main__":
|
106 |
-
demo.launch()
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import openai
|
3 |
import speech_recognition as sr
|
4 |
import logging
|
5 |
+
import traceback
|
6 |
|
7 |
# Set up logging
|
8 |
+
logging.basicConfig(level=logging.DEBUG)
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
# Set OpenAI API key
|
|
|
27 |
|
28 |
def get_pronunciation_feedback(original_text, transcription):
|
29 |
try:
|
30 |
+
logger.debug(f"Original text: {original_text}")
|
31 |
+
logger.debug(f"Transcription: {transcription}")
|
32 |
response = openai.ChatCompletion.create(
|
33 |
model="gpt-3.5-turbo",
|
34 |
messages=[
|
|
|
36 |
{"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
|
37 |
]
|
38 |
)
|
39 |
+
feedback = response.choices[0].message['content']
|
40 |
+
logger.debug(f"Generated feedback: {feedback}")
|
41 |
+
return feedback
|
42 |
except Exception as e:
|
43 |
logger.error(f"Error in get_pronunciation_feedback: {str(e)}")
|
44 |
+
logger.error(traceback.format_exc())
|
45 |
return "Error generating feedback. Please try again."
|
46 |
|
47 |
def transcribe_audio_realtime(audio):
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
try:
|
52 |
+
logger.debug(f"Received audio file: {audio}")
|
53 |
recognizer = sr.Recognizer()
|
54 |
with sr.AudioFile(audio) as source:
|
55 |
+
logger.debug("Reading audio file")
|
56 |
audio_data = recognizer.record(source)
|
57 |
+
logger.debug("Transcribing audio")
|
58 |
+
transcription = recognizer.recognize_google(audio_data)
|
59 |
+
logger.debug(f"Transcription result: {transcription}")
|
60 |
+
return transcription
|
61 |
except sr.UnknownValueError:
|
62 |
+
logger.warning("Could not understand audio")
|
63 |
return "Could not understand audio"
|
64 |
except sr.RequestError as e:
|
65 |
logger.error(f"Could not request results from the speech recognition service; {str(e)}")
|
66 |
return "Error in speech recognition service"
|
67 |
except Exception as e:
|
68 |
logger.error(f"Error in transcribe_audio_realtime: {str(e)}")
|
69 |
+
logger.error(traceback.format_exc())
|
70 |
return "Error transcribing audio. Please try again."
|
71 |
|
72 |
def practice_pronunciation(audio, text_to_read):
|
73 |
+
logger.info("Starting practice_pronunciation function")
|
74 |
if not text_to_read:
|
75 |
+
logger.info("Generating new text to read")
|
76 |
text_to_read = generate_text()
|
77 |
+
logger.info(f"Text to read: {text_to_read}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
+
logger.info("Starting transcription")
|
80 |
+
transcription = transcribe_audio_realtime(audio)
|
81 |
+
logger.info(f"Transcription result: {transcription}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
+
logger.info("Getting pronunciation feedback")
|
84 |
+
feedback = get_pronunciation_feedback(text_to_read, transcription)
|
85 |
+
logger.info(f"Feedback generated: {feedback}")
|
86 |
|
87 |
+
return text_to_read, transcription, feedback
|
|
|
|
|
|
|
|
|
|
|
|
|
|