import torch import json import re import spacy import textstat import language_tool_python import numpy as np import os import gradio as gr from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from openai import OpenAI # Load Environment Variables openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) #Language Tool API tool = language_tool_python.LanguageToolPublicAPI('en-US') # Load Whisper Model (CPU-Compatible) AUDIO_MODEL = "openai/whisper-base" speech_model = AutoModelForSpeechSeq2Seq.from_pretrained( AUDIO_MODEL, torch_dtype=torch.float32, low_cpu_mem_usage=True ) processor = AutoProcessor.from_pretrained(AUDIO_MODEL) # Create ASR Pipeline (Runs on CPU) pipe = pipeline( "automatic-speech-recognition", model=speech_model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, torch_dtype=torch.float32, device='cpu', ) # Transcribe Audio def transcribe_audio(audio_path): result = pipe(audio_path, return_timestamps=True) return result["text"] # Split into Sentences def split_into_sentences(text): return re.split(r'(?<=[.!?])\s+', text) # Fluency Analysis def analyze_fluency(text): matches = tool.check(text) grammar_mistakes = len(matches) readability = textstat.flesch_kincaid_grade(text) fluency_score = max(0, 10 - (grammar_mistakes * 0.5) - (readability * 0.3)) return round(fluency_score, 1) # Vocabulary Analysis def analyze_vocabulary(text): words = text.split() unique_words = set(words) diversity = len(unique_words) / len(words) if words else 0 vocab_score = round(diversity * 10, 2) return min(vocab_score, 10) # Grammar Analysis def analyze_grammar(text): matches = tool.check(text) grammar_score = max(0, 10 - len(matches) * 0.5) return round(grammar_score, 1) # Relevance Analysis def analyze_relevance(text, topic): vectorizer = TfidfVectorizer() tfidf_matrix = vectorizer.fit_transform([text, topic]) similarity = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0] relevance_score = similarity * 10 return round(relevance_score, 1) def evaluate_overall_gpt(transcript, topic): """Uses OpenAI GPT-4 to evaluate the overall transcript""" response = openai.chat.completions.create( model="gpt-4o-mini", messages=[ {"role": "system", "content": "You are an AI that evaluates speech transcripts. ONLY return JSON, no explanations."}, { "role": "user", "content": f""" Analyze the following transcript and score it based on: - **Fluency** (0-10) - **Vocabulary richness** (0-10) - **Grammar correctness** (0-10) - **Relevance to the topic '{topic}'** (0-10) ### **STRICT INSTRUCTIONS:** Return **ONLY** a valid JSON object, no extra text. The expected format: ```json {{ "fluency": 8.5, "vocabulary": 7.2, "grammar": 9.0, "relevance": 6.8, "overall": 7.9 }} {transcript} """ } ] , response_format={"type": "json_object"} ) response_text = response.choices[0].message.content if not response_text: print("GPT-4 returned an empty response!") return {"error": "No response from OpenAI"} try: return json.loads(response_text) except json.JSONDecodeError: print("GPT-4 returned an invalid JSON response!") return {"error": "Invalid JSON response from OpenAI"} def analyze_audio_ml_gpt(audio_path, topic): full_transcript = transcribe_audio(audio_path) sentences = split_into_sentences(full_transcript) sentence_scores = [ { "text": sentence, "fluency": analyze_fluency(sentence), "vocabulary": analyze_vocabulary(sentence), "grammar": analyze_grammar(sentence), "relevance": analyze_relevance(sentence, topic) } for sentence in sentences ] overall_scores = evaluate_overall_gpt(full_transcript, topic) return overall_scores, sentence_scores # Gradio UI with gr.Blocks() as app: gr.Markdown("## 🎙️ AI-Powered Speech Analysis") gr.Markdown("Upload an audio file and enter a topic to analyze fluency, vocabulary, grammar, and relevance.") with gr.Row(): audio_input = gr.File(label="Upload Audio File") topic_input = gr.Textbox(label="Enter Topic") analyze_button = gr.Button("Analyze Speech") output_overall = gr.JSON(label="Overall Scores (GPT-4)") output_sentences = gr.JSON(label="Sentence-Level Scores (ML)") analyze_button.click( fn=analyze_audio_ml_gpt, inputs=[audio_input, topic_input], outputs=[output_overall, output_sentences] ) # Launch Gradio App app.launch(share=True)