import torch
import json
import re
import spacy
import textstat
import language_tool_python
import numpy as np
import os
import gradio as gr
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI

# Load Environment Variables
openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

#Language Tool API
tool = language_tool_python.LanguageToolPublicAPI('en-US')

# Load Whisper Model (CPU-Compatible)
AUDIO_MODEL = "openai/whisper-base"
speech_model = AutoModelForSpeechSeq2Seq.from_pretrained(
    AUDIO_MODEL, torch_dtype=torch.float32, low_cpu_mem_usage=True
)
processor = AutoProcessor.from_pretrained(AUDIO_MODEL)

# Create ASR Pipeline (Runs on CPU)
pipe = pipeline(
    "automatic-speech-recognition",
    model=speech_model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch.float32, 
    device='cpu',
)

# Transcribe Audio
def transcribe_audio(audio_path):
    result = pipe(audio_path, return_timestamps=True)
    return result["text"]

# Split into Sentences
def split_into_sentences(text):
    return re.split(r'(?<=[.!?])\s+', text)

# Fluency Analysis
def analyze_fluency(text):
    matches = tool.check(text)
    grammar_mistakes = len(matches)
    readability = textstat.flesch_kincaid_grade(text)
    fluency_score = max(0, 10 - (grammar_mistakes * 0.5) - (readability * 0.3))
    return round(fluency_score, 1)

# Vocabulary Analysis
def analyze_vocabulary(text):
    words = text.split()
    unique_words = set(words)
    diversity = len(unique_words) / len(words) if words else 0
    vocab_score = round(diversity * 10, 2)
    return min(vocab_score, 10)

# Grammar Analysis
def analyze_grammar(text):
    matches = tool.check(text)
    grammar_score = max(0, 10 - len(matches) * 0.5)
    return round(grammar_score, 1)

# Relevance Analysis
def analyze_relevance(text, topic):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([text, topic])
    similarity = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
    relevance_score = similarity * 10
    return round(relevance_score, 1)

def evaluate_overall_gpt(transcript, topic):
    """Uses OpenAI GPT-4 to evaluate the overall transcript"""

    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are an AI that evaluates speech transcripts. ONLY return JSON, no explanations."},
            {
                "role": "user",
                "content": f"""
Analyze the following transcript and score it based on:
- **Fluency** (0-10)
- **Vocabulary richness** (0-10)
- **Grammar correctness** (0-10)
- **Relevance to the topic '{topic}'** (0-10)

### **STRICT INSTRUCTIONS:**
Return **ONLY** a valid JSON object, no extra text. The expected format:
```json
{{
  "fluency": 8.5,
  "vocabulary": 7.2,
  "grammar": 9.0,
  "relevance": 6.8,
  "overall": 7.9
}} {transcript} """ } ] ,
        response_format={"type": "json_object"}
    )
    response_text = response.choices[0].message.content
    if not response_text:
      print("GPT-4 returned an empty response!")
      return {"error": "No response from OpenAI"}
    try:
      return json.loads(response_text)
    except json.JSONDecodeError:
      print("GPT-4 returned an invalid JSON response!")
      return {"error": "Invalid JSON response from OpenAI"}

def analyze_audio_ml_gpt(audio_path, topic):
    full_transcript = transcribe_audio(audio_path)
    sentences = split_into_sentences(full_transcript)
    
    sentence_scores = [
        {
            "text": sentence,
            "fluency": analyze_fluency(sentence),
            "vocabulary": analyze_vocabulary(sentence),
            "grammar": analyze_grammar(sentence),
            "relevance": analyze_relevance(sentence, topic)
        }
        for sentence in sentences
    ]
    
    overall_scores = evaluate_overall_gpt(full_transcript, topic)
    return overall_scores, sentence_scores

# Gradio UI
with gr.Blocks() as app:
    gr.Markdown("## 🎙️ AI-Powered Speech Analysis")
    gr.Markdown("Upload an audio file and enter a topic to analyze fluency, vocabulary, grammar, and relevance.")

    with gr.Row():
        audio_input = gr.File(label="Upload Audio File")
        topic_input = gr.Textbox(label="Enter Topic")

    analyze_button = gr.Button("Analyze Speech")
    output_overall = gr.JSON(label="Overall Scores (GPT-4)")
    output_sentences = gr.JSON(label="Sentence-Level Scores (ML)")

    analyze_button.click(
        fn=analyze_audio_ml_gpt,
        inputs=[audio_input, topic_input],
        outputs=[output_overall, output_sentences]
    )

# Launch Gradio App 
app.launch(share=True)