Spaces:

Talha812
/

AI-Powered-RecruitmentAssistant

Running

File size: 7,891 Bytes

9ea4ad7
 
36680cc
9ea4ad7
 
 
36680cc
8948c5a
 
9ea4ad7
 
8948c5a
 
 
36680cc
9ea4ad7
 
 
ca5bcb1
 
 
36680cc
 
9ea4ad7
36680cc
9ea4ad7
 
8948c5a
 
 
 
 
 
 
 
 
9ea4ad7
 
36680cc
9ea4ad7
 
36680cc
9ea4ad7
 
 
 
 
 
 
 
37504a2
b41f52e
 
9ea4ad7
37504a2
 
 
9ea4ad7
37504a2
 
9ea4ad7
 
37504a2
9ea4ad7
36680cc
9ea4ad7
 
 
 
 
 
 
36680cc
 
 
 
 
 
 
 
 
9ea4ad7
 
 
 
 
 
 
 
 
36680cc
 
9ea4ad7
 
 
 
 
 
 
 
 
 
36680cc
 
9ea4ad7
36680cc
9ea4ad7
 
36680cc
463776a
36680cc
9ea4ad7
36680cc
 
 
0d3d70e
9ea4ad7
1e9bc48
37504a2
9ea4ad7
 
 
36680cc
 
 
 
 
 
 
 
9ea4ad7
36680cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ea4ad7
469df2a
9ea4ad7
 
36680cc
9ea4ad7
36680cc
 
 
9ea4ad7
36680cc
 
 
 
 
 
 
 
 
 
9ea4ad7
36680cc
 
 
 
 
 
 
 
 
 
 
 
8948c5a
 
 
 
 
 
 
 
 
 
36680cc
 
 
8948c5a
 
 
 
 
 
 
 
36680cc
 
 
9ea4ad7
36680cc
8948c5a
a0e5d2b
 
 
 
8948c5a
 
 
9ea4ad7
36680cc
9ea4ad7
8948c5a
9ea4ad7
a16fa05

import streamlit as st
import re
import json
from PyPDF2 import PdfReader
from docx import Document
import spacy
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration
import os
from groq import Groq

# Initialize Groq API client
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# Initialize NLP components
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")
    # st.error("Please install the SpaCy English model: 'python -m spacy download en_core_web_sm'")
    # st.stop()

# Initialize models
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')

# Initialize T5 question generator with proper tokenizer
tokenizer = AutoTokenizer.from_pretrained("t5-base", use_fast=False)
model = T5ForConditionalGeneration.from_pretrained("t5-base")
question_generator = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    framework="pt"
)

def extract_text(file):
    """Extract text from various file formats"""
    if file.name.endswith('.pdf'):
        reader = PdfReader(file)
        return " ".join([page.extract_text() for page in reader.pages])
    elif file.name.endswith('.docx'):
        doc = Document(file)
        return " ".join([para.text for para in doc.paragraphs])
    elif file.name.endswith('.txt'):
        return file.read().decode()
    return ""

def extract_contact_info(text):
    """Extract phone numbers and emails using regex"""
    phone_pattern = r'\+?\d{1,3}[-.\s]?\(?\d{2,4}\)?[-.\s]?\d{3,4}[-.\s]?\d{4}'
    email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    
    phones = re.findall(phone_pattern, text)
    emails = re.findall(email_pattern, text)
    
    return {
        'phone': ', '.join(phones) if phones else 'Not found',
        'email': ', '.join(emails) if emails else 'Not found'
    }


def extract_name(text):
    """Extract candidate name using SpaCy NER"""
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == 'PERSON':
            return ent.text
    return "Not found"

def analyze_sections(text):
    """Parse resume sections using rule-based approach"""
    sections = {
        'experience': [],
        'skills': [],
        'education': [],
        'certifications': []
    }
    
    current_section = None
    section_keywords = {
        'experience': ['experience', 'work history', 'employment'],
        'skills': ['skills', 'competencies', 'technologies'],
        'education': ['education', 'academic background'],
        'certifications': ['certifications', 'licenses', 'courses']
    }
    
    for line in text.split('\n'):
        line_lower = line.strip().lower()
        
        # Detect section headers
        for section, keywords in section_keywords.items():
            if any(keyword in line_lower for keyword in keywords):
                current_section = section
                break
        else:
            if current_section and line.strip():
                sections[current_section].append(line.strip())
    
    return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}

def calculate_similarity(resume_text, jd_text):
    """Calculate semantic similarity between resume and JD"""
    embeddings = similarity_model.encode([resume_text, jd_text])
    return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() * 100

def generate_interview_questions(resume_text, jd_text):
    """Generate interview questions using Groq API"""
    input_text = f"Generate interview questions based on the resume and job description.Here is the resume: {resume_text}\n and here is the Job Description:{jd_text} Give me concise to the point questions only. Not description of resume or Job Description."
    
    response = client.chat.completions.create(
        messages=[
            {"role": "user", "content": input_text}
        ],
        model="deepseek-r1-distill-llama-70b"
    )
    return response.choices[0].message.content

# Streamlit UI Configuration
st.set_page_config(page_title="AI Resume Analyzer", layout="wide")

# Main Application
st.title("AI-Powered Resume Analyzer 🧠")
st.markdown("""
    Upload a candidate's resume and paste the job description to get:
    - Candidate profile analysis
    - Job requirement matching
    - Automated interview questions
""")

# File Upload and JD Input
with st.container():
    col1, col2 = st.columns([2, 3])
    
    with col1:
        uploaded_file = st.file_uploader(
            "Upload Resume (PDF/DOCX/TXT)",
            type=['pdf', 'docx', 'txt'],
            help="Supported formats: PDF, Word, Text"
        )
    
    with col2:
        jd_input = st.text_area(
            "Paste Job Description",
            height=200,
            placeholder="Paste the complete job description here..."
        )

if st.button("Process Resume"):
    if uploaded_file and jd_input:
        resume_text = extract_text(uploaded_file)
        
        if resume_text:
            # Candidate Profile Section
            st.header("👤 Candidate Profile")
            profile_col1, profile_col2 = st.columns([1, 2])
            
            with profile_col1:
                st.subheader("Basic Information")
                name = extract_name(resume_text)
                contact = extract_contact_info(resume_text)
                
                st.markdown(f"""
                    **Name:** {name}  
                    **Phone:** {contact['phone']}  
                    **Email:** {contact['email']}
                """)
            
            with profile_col2:
                st.subheader("Professional Summary")
                sections = analyze_sections(resume_text)
                
                exp_col, edu_col = st.columns(2)
                with exp_col:
                    with st.expander("Work Experience"):
                        st.write(sections['experience'])
                
                with edu_col:
                    with st.expander("Education"):
                        st.write(sections['education'])
                
                skills_col, cert_col = st.columns(2)
                with skills_col:
                    with st.expander("Skills"):
                        st.write(sections['skills'])
                
                with cert_col:
                    with st.expander("Certifications"):
                        st.write(sections['certifications'])
    
            # Job Matching Analysis
            st.header("📊 Job Compatibility Analysis")
            match_score = calculate_similarity(resume_text, jd_input)
            
            col1, col2 = st.columns([1, 3])
            with col1:
                st.metric("Match Percentage", f"{match_score:.1f}%")
            
            with col2:
                st.progress(match_score/100)
                st.caption("Semantic similarity score between resume content and job description")
    
            # Interview Questions
            st.header("❓ Suggested Interview Questions")
            questions = generate_interview_questions(resume_text, jd_input)
            
            if questions:
                st.write(questions)
                # cleaned_questions = questions.replace("\\n", "\n").split("\n")
                # for i, q in enumerate(cleaned_questions[:5]):
                #     st.markdown(f"{i+1}. {q.strip()}")
            else:
                st.warning("Could not generate questions. Please try with more detailed inputs.")
    
    else:
        st.info("👆 Please upload a resume and enter a job description to begin analysis")

# Footer
st.markdown("---")
st.markdown("Built with ♥ using [Streamlit](https://streamlit.io) | [Hugging Face](https://huggingface.co) | [Spacy](https://spacy.io) | FAISS | Groq AI")