import gradio as gr import whisper import difflib # To compare expected vs actual pronunciation # Load the Whisper model model = whisper.load_model("base") def pronunciation_feedback(transcription, reference_text): """ Function to give basic feedback on pronunciation based on differences between the transcribed text and the reference text. """ diff = difflib.ndiff(reference_text.split(), transcription.split()) errors = [word for word in diff if word.startswith('- ')] # Find words missing or mispronounced if errors: feedback = "You mispronounced the following words: " + ', '.join([error[2:] for error in errors]) else: feedback = "Great job! Your pronunciation is spot on." return feedback def transcribe_and_feedback(audio, reference_text): """ Transcribes audio and provides pronunciation feedback. """ # Transcribe the audio using Whisper result = model.transcribe(audio) transcription = result['text'] # Provide basic pronunciation feedback feedback = pronunciation_feedback(transcription, reference_text) return transcription, feedback # Create the Gradio interface for real-time transcription and feedback interface = gr.Interface( fn=transcribe_and_feedback, # Function to transcribe and give feedback inputs=[gr.Audio(source="microphone", type="filepath"), gr.Textbox(label="Expected Text")], outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Pronunciation Feedback")], live=True # Enables real-time transcription ) # Launch the Gradio interface interface.launch(share=True)