import streamlit as st
import re
import json
from PyPDF2 import PdfReader
from docx import Document
import spacy
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration
import os
from groq import Groq

# Initialize Groq API client
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# Initialize NLP components
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")
    # st.error("Please install the SpaCy English model: 'python -m spacy download en_core_web_sm'")
    # st.stop()

# Initialize models
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')

# Initialize T5 question generator with proper tokenizer
tokenizer = AutoTokenizer.from_pretrained("t5-base", use_fast=False)
model = T5ForConditionalGeneration.from_pretrained("t5-base")
question_generator = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    framework="pt"
)

def extract_text(file):
    """Extract text from various file formats"""
    if file.name.endswith('.pdf'):
        reader = PdfReader(file)
        return " ".join([page.extract_text() for page in reader.pages])
    elif file.name.endswith('.docx'):
        doc = Document(file)
        return " ".join([para.text for para in doc.paragraphs])
    elif file.name.endswith('.txt'):
        return file.read().decode()
    return ""

def extract_contact_info(text):
    """Extract phone numbers and emails using regex"""
    phone_pattern = r'\+?\d{1,3}[-.\s]?\(?\d{2,4}\)?[-.\s]?\d{3,4}[-.\s]?\d{4}'
    email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    
    phones = re.findall(phone_pattern, text)
    emails = re.findall(email_pattern, text)
    
    return {
        'phone': ', '.join(phones) if phones else 'Not found',
        'email': ', '.join(emails) if emails else 'Not found'
    }


def extract_name(text):
    """Extract candidate name using SpaCy NER"""
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == 'PERSON':
            return ent.text
    return "Not found"

def analyze_sections(text):
    """Parse resume sections using rule-based approach"""
    sections = {
        'experience': [],
        'skills': [],
        'education': [],
        'certifications': []
    }
    
    current_section = None
    section_keywords = {
        'experience': ['experience', 'work history', 'employment'],
        'skills': ['skills', 'competencies', 'technologies'],
        'education': ['education', 'academic background'],
        'certifications': ['certifications', 'licenses', 'courses']
    }
    
    for line in text.split('\n'):
        line_lower = line.strip().lower()
        
        # Detect section headers
        for section, keywords in section_keywords.items():
            if any(keyword in line_lower for keyword in keywords):
                current_section = section
                break
        else:
            if current_section and line.strip():
                sections[current_section].append(line.strip())
    
    return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}

def calculate_similarity(resume_text, jd_text):
    """Calculate semantic similarity between resume and JD"""
    embeddings = similarity_model.encode([resume_text, jd_text])
    return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() * 100

def generate_interview_questions(resume_text, jd_text):
    """Generate interview questions using Groq API"""
    input_text = f"Generate interview questions based on the resume and job description.Here is the resume: {resume_text}\n and here is the Job Description:{jd_text} Give me concise to the point questions only. Not description of resume or Job Description."
    
    response = client.chat.completions.create(
        messages=[
            {"role": "user", "content": input_text}
        ],
        model="deepseek-r1-distill-llama-70b"
    )
    return response.choices[0].message.content

# Streamlit UI Configuration
st.set_page_config(page_title="AI Resume Analyzer", layout="wide")

# Main Application
st.title("AI-Powered Resume Analyzer 🧠")
st.markdown("""
    Upload a candidate's resume and paste the job description to get:
    - Candidate profile analysis
    - Job requirement matching
    - Automated interview questions
""")

# File Upload and JD Input
with st.container():
    col1, col2 = st.columns([2, 3])
    
    with col1:
        uploaded_file = st.file_uploader(
            "Upload Resume (PDF/DOCX/TXT)",
            type=['pdf', 'docx', 'txt'],
            help="Supported formats: PDF, Word, Text"
        )
    
    with col2:
        jd_input = st.text_area(
            "Paste Job Description",
            height=200,
            placeholder="Paste the complete job description here..."
        )

if st.button("Process Resume"):
    if uploaded_file and jd_input:
        resume_text = extract_text(uploaded_file)
        
        if resume_text:
            # Candidate Profile Section
            st.header("👤 Candidate Profile")
            profile_col1, profile_col2 = st.columns([1, 2])
            
            with profile_col1:
                st.subheader("Basic Information")
                name = extract_name(resume_text)
                contact = extract_contact_info(resume_text)
                
                st.markdown(f"""
                    **Name:** {name}  
                    **Phone:** {contact['phone']}  
                    **Email:** {contact['email']}
                """)
            
            with profile_col2:
                st.subheader("Professional Summary")
                sections = analyze_sections(resume_text)
                
                exp_col, edu_col = st.columns(2)
                with exp_col:
                    with st.expander("Work Experience"):
                        st.write(sections['experience'])
                
                with edu_col:
                    with st.expander("Education"):
                        st.write(sections['education'])
                
                skills_col, cert_col = st.columns(2)
                with skills_col:
                    with st.expander("Skills"):
                        st.write(sections['skills'])
                
                with cert_col:
                    with st.expander("Certifications"):
                        st.write(sections['certifications'])
    
            # Job Matching Analysis
            st.header("📊 Job Compatibility Analysis")
            match_score = calculate_similarity(resume_text, jd_input)
            
            col1, col2 = st.columns([1, 3])
            with col1:
                st.metric("Match Percentage", f"{match_score:.1f}%")
            
            with col2:
                st.progress(match_score/100)
                st.caption("Semantic similarity score between resume content and job description")
    
            # Interview Questions
            st.header("❓ Suggested Interview Questions")
            questions = generate_interview_questions(resume_text, jd_input)
            
            if questions:
                st.write(questions)
                # cleaned_questions = questions.replace("\\n", "\n").split("\n")
                # for i, q in enumerate(cleaned_questions[:5]):
                #     st.markdown(f"{i+1}. {q.strip()}")
            else:
                st.warning("Could not generate questions. Please try with more detailed inputs.")
    
    else:
        st.info("👆 Please upload a resume and enter a job description to begin analysis")

# Footer
st.markdown("---")
st.markdown("Built with ♥ using [Streamlit](https://streamlit.io) | [Hugging Face](https://huggingface.co) | [Spacy](https://spacy.io) | FAISS | Groq AI")