|
import streamlit as st |
|
import re |
|
import json |
|
from PyPDF2 import PdfReader |
|
from docx import Document |
|
import spacy |
|
from sentence_transformers import SentenceTransformer, util |
|
from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration |
|
import os |
|
from groq import Groq |
|
|
|
|
|
client = Groq(api_key=os.environ["GROQ_API_KEY"]) |
|
|
|
|
|
try: |
|
nlp = spacy.load("en_core_web_sm") |
|
except OSError: |
|
from spacy.cli import download |
|
download("en_core_web_sm") |
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
|
|
|
|
|
|
similarity_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("t5-base", use_fast=False) |
|
model = T5ForConditionalGeneration.from_pretrained("t5-base") |
|
question_generator = pipeline( |
|
"text2text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
framework="pt" |
|
) |
|
|
|
def extract_text(file): |
|
"""Extract text from various file formats""" |
|
if file.name.endswith('.pdf'): |
|
reader = PdfReader(file) |
|
return " ".join([page.extract_text() for page in reader.pages]) |
|
elif file.name.endswith('.docx'): |
|
doc = Document(file) |
|
return " ".join([para.text for para in doc.paragraphs]) |
|
elif file.name.endswith('.txt'): |
|
return file.read().decode() |
|
return "" |
|
|
|
def extract_contact_info(text): |
|
"""Extract phone numbers and emails using regex""" |
|
phone_pattern = r'\+?\d{1,3}[-.\s]?\(?\d{2,4}\)?[-.\s]?\d{3,4}[-.\s]?\d{4}' |
|
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' |
|
|
|
phones = re.findall(phone_pattern, text) |
|
emails = re.findall(email_pattern, text) |
|
|
|
return { |
|
'phone': ', '.join(phones) if phones else 'Not found', |
|
'email': ', '.join(emails) if emails else 'Not found' |
|
} |
|
|
|
|
|
def extract_name(text): |
|
"""Extract candidate name using SpaCy NER""" |
|
doc = nlp(text) |
|
for ent in doc.ents: |
|
if ent.label_ == 'PERSON': |
|
return ent.text |
|
return "Not found" |
|
|
|
def analyze_sections(text): |
|
"""Parse resume sections using rule-based approach""" |
|
sections = { |
|
'experience': [], |
|
'skills': [], |
|
'education': [], |
|
'certifications': [] |
|
} |
|
|
|
current_section = None |
|
section_keywords = { |
|
'experience': ['experience', 'work history', 'employment'], |
|
'skills': ['skills', 'competencies', 'technologies'], |
|
'education': ['education', 'academic background'], |
|
'certifications': ['certifications', 'licenses', 'courses'] |
|
} |
|
|
|
for line in text.split('\n'): |
|
line_lower = line.strip().lower() |
|
|
|
|
|
for section, keywords in section_keywords.items(): |
|
if any(keyword in line_lower for keyword in keywords): |
|
current_section = section |
|
break |
|
else: |
|
if current_section and line.strip(): |
|
sections[current_section].append(line.strip()) |
|
|
|
return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()} |
|
|
|
def calculate_similarity(resume_text, jd_text): |
|
"""Calculate semantic similarity between resume and JD""" |
|
embeddings = similarity_model.encode([resume_text, jd_text]) |
|
return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() * 100 |
|
|
|
def generate_interview_questions(resume_text, jd_text): |
|
"""Generate interview questions using Groq API""" |
|
input_text = f"Generate interview questions based on the resume and job description.Here is the resume: {resume_text}\n and here is the Job Description:{jd_text} Give me concise to the point questions only. Not description of resume or Job Description." |
|
|
|
response = client.chat.completions.create( |
|
messages=[ |
|
{"role": "user", "content": input_text} |
|
], |
|
model="deepseek-r1-distill-llama-70b" |
|
) |
|
return response.choices[0].message.content |
|
|
|
|
|
st.set_page_config(page_title="AI Resume Analyzer", layout="wide") |
|
|
|
|
|
st.title("AI-Powered Resume Analyzer π§ ") |
|
st.markdown(""" |
|
Upload a candidate's resume and paste the job description to get: |
|
- Candidate profile analysis |
|
- Job requirement matching |
|
- Automated interview questions |
|
""") |
|
|
|
|
|
with st.container(): |
|
col1, col2 = st.columns([2, 3]) |
|
|
|
with col1: |
|
uploaded_file = st.file_uploader( |
|
"Upload Resume (PDF/DOCX/TXT)", |
|
type=['pdf', 'docx', 'txt'], |
|
help="Supported formats: PDF, Word, Text" |
|
) |
|
|
|
with col2: |
|
jd_input = st.text_area( |
|
"Paste Job Description", |
|
height=200, |
|
placeholder="Paste the complete job description here..." |
|
) |
|
|
|
if st.button("Process Resume"): |
|
if uploaded_file and jd_input: |
|
resume_text = extract_text(uploaded_file) |
|
|
|
if resume_text: |
|
|
|
st.header("π€ Candidate Profile") |
|
profile_col1, profile_col2 = st.columns([1, 2]) |
|
|
|
with profile_col1: |
|
st.subheader("Basic Information") |
|
name = extract_name(resume_text) |
|
contact = extract_contact_info(resume_text) |
|
|
|
st.markdown(f""" |
|
**Name:** {name} |
|
**Phone:** {contact['phone']} |
|
**Email:** {contact['email']} |
|
""") |
|
|
|
with profile_col2: |
|
st.subheader("Professional Summary") |
|
sections = analyze_sections(resume_text) |
|
|
|
exp_col, edu_col = st.columns(2) |
|
with exp_col: |
|
with st.expander("Work Experience"): |
|
st.write(sections['experience']) |
|
|
|
with edu_col: |
|
with st.expander("Education"): |
|
st.write(sections['education']) |
|
|
|
skills_col, cert_col = st.columns(2) |
|
with skills_col: |
|
with st.expander("Skills"): |
|
st.write(sections['skills']) |
|
|
|
with cert_col: |
|
with st.expander("Certifications"): |
|
st.write(sections['certifications']) |
|
|
|
|
|
st.header("π Job Compatibility Analysis") |
|
match_score = calculate_similarity(resume_text, jd_input) |
|
|
|
col1, col2 = st.columns([1, 3]) |
|
with col1: |
|
st.metric("Match Percentage", f"{match_score:.1f}%") |
|
|
|
with col2: |
|
st.progress(match_score/100) |
|
st.caption("Semantic similarity score between resume content and job description") |
|
|
|
|
|
st.header("β Suggested Interview Questions") |
|
questions = generate_interview_questions(resume_text, jd_input) |
|
|
|
if questions: |
|
st.write(questions) |
|
|
|
|
|
|
|
else: |
|
st.warning("Could not generate questions. Please try with more detailed inputs.") |
|
|
|
else: |
|
st.info("π Please upload a resume and enter a job description to begin analysis") |
|
|
|
|
|
st.markdown("---") |
|
st.markdown("Built with β₯ using [Streamlit](https://streamlit.io) | [Hugging Face](https://huggingface.co) | [Spacy](https://spacy.io) | FAISS | Groq AI") |