Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,10 +3,9 @@ import gradio as gr
|
|
3 |
import openai
|
4 |
import speech_recognition as sr
|
5 |
import logging
|
6 |
-
import traceback
|
7 |
|
8 |
# Set up logging
|
9 |
-
logging.basicConfig(level=logging.
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
12 |
# Set OpenAI API key
|
@@ -28,8 +27,6 @@ def generate_text():
|
|
28 |
|
29 |
def get_pronunciation_feedback(original_text, transcription):
|
30 |
try:
|
31 |
-
logger.debug(f"Original text: {original_text}")
|
32 |
-
logger.debug(f"Transcription: {transcription}")
|
33 |
response = openai.ChatCompletion.create(
|
34 |
model="gpt-3.5-turbo",
|
35 |
messages=[
|
@@ -37,73 +34,73 @@ def get_pronunciation_feedback(original_text, transcription):
|
|
37 |
{"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
|
38 |
]
|
39 |
)
|
40 |
-
|
41 |
-
logger.debug(f"Generated feedback: {feedback}")
|
42 |
-
return feedback
|
43 |
except Exception as e:
|
44 |
logger.error(f"Error in get_pronunciation_feedback: {str(e)}")
|
45 |
-
logger.error(traceback.format_exc())
|
46 |
return "Error generating feedback. Please try again."
|
47 |
|
48 |
def transcribe_audio_realtime(audio):
|
49 |
try:
|
50 |
-
logger.debug(f"Received audio file: {audio}")
|
51 |
recognizer = sr.Recognizer()
|
52 |
with sr.AudioFile(audio) as source:
|
53 |
-
logger.debug("Reading audio file")
|
54 |
audio_data = recognizer.record(source)
|
55 |
-
|
56 |
-
transcription = recognizer.recognize_google(audio_data)
|
57 |
-
logger.debug(f"Transcription result: {transcription}")
|
58 |
-
return transcription
|
59 |
except sr.UnknownValueError:
|
60 |
-
logger.warning("Could not understand audio")
|
61 |
return "Could not understand audio"
|
62 |
except sr.RequestError as e:
|
63 |
logger.error(f"Could not request results from the speech recognition service; {str(e)}")
|
64 |
return "Error in speech recognition service"
|
65 |
except Exception as e:
|
66 |
logger.error(f"Error in transcribe_audio_realtime: {str(e)}")
|
67 |
-
logger.error(traceback.format_exc())
|
68 |
return "Error transcribing audio. Please try again."
|
69 |
|
70 |
def practice_pronunciation(audio, text_to_read):
|
71 |
-
logger.info("Starting practice_pronunciation function")
|
72 |
if not text_to_read:
|
73 |
-
logger.info("Generating new text to read")
|
74 |
text_to_read = generate_text()
|
75 |
-
logger.info(f"Text to read: {text_to_read}")
|
76 |
-
|
77 |
-
logger.info("Starting transcription")
|
78 |
transcription = transcribe_audio_realtime(audio)
|
79 |
-
logger.info(f"Transcription result: {transcription}")
|
80 |
-
|
81 |
-
logger.info("Getting pronunciation feedback")
|
82 |
feedback = get_pronunciation_feedback(text_to_read, transcription)
|
83 |
-
logger.info(f"Feedback generated: {feedback}")
|
84 |
-
|
85 |
return text_to_read, transcription, feedback
|
86 |
|
87 |
-
#
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
-
with gr.
|
93 |
-
|
94 |
-
|
|
|
|
|
95 |
|
96 |
-
|
|
|
|
|
97 |
|
98 |
-
with gr.
|
99 |
-
|
100 |
-
|
|
|
|
|
101 |
|
102 |
-
submit_button = gr.Button("Submit")
|
103 |
|
104 |
generate_button.click(generate_text, outputs=text_to_read)
|
105 |
submit_button.click(practice_pronunciation, inputs=[audio_input, text_to_read], outputs=[text_to_read, transcription_output, feedback_output])
|
106 |
|
|
|
|
|
107 |
# Launch the app
|
108 |
if __name__ == "__main__":
|
109 |
demo.launch()
|
|
|
3 |
import openai
|
4 |
import speech_recognition as sr
|
5 |
import logging
|
|
|
6 |
|
7 |
# Set up logging
|
8 |
+
logging.basicConfig(level=logging.INFO)
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
# Set OpenAI API key
|
|
|
27 |
|
28 |
def get_pronunciation_feedback(original_text, transcription):
|
29 |
try:
|
|
|
|
|
30 |
response = openai.ChatCompletion.create(
|
31 |
model="gpt-3.5-turbo",
|
32 |
messages=[
|
|
|
34 |
{"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
|
35 |
]
|
36 |
)
|
37 |
+
return response.choices[0].message['content']
|
|
|
|
|
38 |
except Exception as e:
|
39 |
logger.error(f"Error in get_pronunciation_feedback: {str(e)}")
|
|
|
40 |
return "Error generating feedback. Please try again."
|
41 |
|
42 |
def transcribe_audio_realtime(audio):
|
43 |
try:
|
|
|
44 |
recognizer = sr.Recognizer()
|
45 |
with sr.AudioFile(audio) as source:
|
|
|
46 |
audio_data = recognizer.record(source)
|
47 |
+
return recognizer.recognize_google(audio_data)
|
|
|
|
|
|
|
48 |
except sr.UnknownValueError:
|
|
|
49 |
return "Could not understand audio"
|
50 |
except sr.RequestError as e:
|
51 |
logger.error(f"Could not request results from the speech recognition service; {str(e)}")
|
52 |
return "Error in speech recognition service"
|
53 |
except Exception as e:
|
54 |
logger.error(f"Error in transcribe_audio_realtime: {str(e)}")
|
|
|
55 |
return "Error transcribing audio. Please try again."
|
56 |
|
57 |
def practice_pronunciation(audio, text_to_read):
|
|
|
58 |
if not text_to_read:
|
|
|
59 |
text_to_read = generate_text()
|
|
|
|
|
|
|
60 |
transcription = transcribe_audio_realtime(audio)
|
|
|
|
|
|
|
61 |
feedback = get_pronunciation_feedback(text_to_read, transcription)
|
|
|
|
|
62 |
return text_to_read, transcription, feedback
|
63 |
|
64 |
+
# Custom CSS for improved styling
|
65 |
+
custom_css = """
|
66 |
+
.container {max-width: 800px; margin: auto; padding: 20px;}
|
67 |
+
.title {text-align: center; color: #2c3e50; margin-bottom: 20px;}
|
68 |
+
.subtitle {text-align: center; color: #34495e; margin-bottom: 30px;}
|
69 |
+
.input-section, .output-section {background-color: #ecf0f1; padding: 20px; border-radius: 10px; margin-bottom: 20px;}
|
70 |
+
.input-section h3, .output-section h3 {color: #2980b9; margin-bottom: 10px;}
|
71 |
+
.button-primary {background-color: #3498db !important;}
|
72 |
+
.button-secondary {background-color: #2ecc71 !important;}
|
73 |
+
"""
|
74 |
+
|
75 |
+
# Gradio interface with improved UI
|
76 |
+
with gr.Blocks(css=custom_css) as demo:
|
77 |
+
gr.HTML("<div class='container'>")
|
78 |
+
gr.HTML("<h1 class='title'>Pronunciation Practice Tool</h1>")
|
79 |
+
gr.HTML("<p class='subtitle'>Improve your English pronunciation with AI-powered feedback</p>")
|
80 |
|
81 |
+
with gr.Box(className="input-section"):
|
82 |
+
gr.HTML("<h3>Step 1: Get Text to Read</h3>")
|
83 |
+
with gr.Row():
|
84 |
+
text_to_read = gr.Textbox(label="Text to Read", placeholder="Click 'Generate New Text' or type your own text here")
|
85 |
+
generate_button = gr.Button("Generate New Text", variant="primary", className="button-primary")
|
86 |
|
87 |
+
with gr.Box(className="input-section"):
|
88 |
+
gr.HTML("<h3>Step 2: Record Your Voice</h3>")
|
89 |
+
audio_input = gr.Audio(type="filepath", label="Record your voice reading the text above")
|
90 |
|
91 |
+
with gr.Box(className="output-section"):
|
92 |
+
gr.HTML("<h3>Step 3: Get Feedback</h3>")
|
93 |
+
with gr.Row():
|
94 |
+
transcription_output = gr.Textbox(label="Your Transcription", lines=3)
|
95 |
+
feedback_output = gr.Textbox(label="Pronunciation Feedback", lines=5)
|
96 |
|
97 |
+
submit_button = gr.Button("Submit for Feedback", variant="secondary", className="button-secondary")
|
98 |
|
99 |
generate_button.click(generate_text, outputs=text_to_read)
|
100 |
submit_button.click(practice_pronunciation, inputs=[audio_input, text_to_read], outputs=[text_to_read, transcription_output, feedback_output])
|
101 |
|
102 |
+
gr.HTML("</div>")
|
103 |
+
|
104 |
# Launch the app
|
105 |
if __name__ == "__main__":
|
106 |
demo.launch()
|