Talha812's picture
Update app.py
463776a verified
import streamlit as st
import re
import json
from PyPDF2 import PdfReader
from docx import Document
import spacy
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration
import os
from groq import Groq
# Initialize Groq API client
client = Groq(api_key=os.environ["GROQ_API_KEY"])
# Initialize NLP components
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
from spacy.cli import download
download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
# st.error("Please install the SpaCy English model: 'python -m spacy download en_core_web_sm'")
# st.stop()
# Initialize models
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
# Initialize T5 question generator with proper tokenizer
tokenizer = AutoTokenizer.from_pretrained("t5-base", use_fast=False)
model = T5ForConditionalGeneration.from_pretrained("t5-base")
question_generator = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
framework="pt"
)
def extract_text(file):
"""Extract text from various file formats"""
if file.name.endswith('.pdf'):
reader = PdfReader(file)
return " ".join([page.extract_text() for page in reader.pages])
elif file.name.endswith('.docx'):
doc = Document(file)
return " ".join([para.text for para in doc.paragraphs])
elif file.name.endswith('.txt'):
return file.read().decode()
return ""
def extract_contact_info(text):
"""Extract phone numbers and emails using regex"""
phone_pattern = r'\+?\d{1,3}[-.\s]?\(?\d{2,4}\)?[-.\s]?\d{3,4}[-.\s]?\d{4}'
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
phones = re.findall(phone_pattern, text)
emails = re.findall(email_pattern, text)
return {
'phone': ', '.join(phones) if phones else 'Not found',
'email': ', '.join(emails) if emails else 'Not found'
}
def extract_name(text):
"""Extract candidate name using SpaCy NER"""
doc = nlp(text)
for ent in doc.ents:
if ent.label_ == 'PERSON':
return ent.text
return "Not found"
def analyze_sections(text):
"""Parse resume sections using rule-based approach"""
sections = {
'experience': [],
'skills': [],
'education': [],
'certifications': []
}
current_section = None
section_keywords = {
'experience': ['experience', 'work history', 'employment'],
'skills': ['skills', 'competencies', 'technologies'],
'education': ['education', 'academic background'],
'certifications': ['certifications', 'licenses', 'courses']
}
for line in text.split('\n'):
line_lower = line.strip().lower()
# Detect section headers
for section, keywords in section_keywords.items():
if any(keyword in line_lower for keyword in keywords):
current_section = section
break
else:
if current_section and line.strip():
sections[current_section].append(line.strip())
return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}
def calculate_similarity(resume_text, jd_text):
"""Calculate semantic similarity between resume and JD"""
embeddings = similarity_model.encode([resume_text, jd_text])
return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() * 100
def generate_interview_questions(resume_text, jd_text):
"""Generate interview questions using Groq API"""
input_text = f"Generate interview questions based on the resume and job description.Here is the resume: {resume_text}\n and here is the Job Description:{jd_text} Give me concise to the point questions only. Not description of resume or Job Description."
response = client.chat.completions.create(
messages=[
{"role": "user", "content": input_text}
],
model="deepseek-r1-distill-llama-70b"
)
return response.choices[0].message.content
# Streamlit UI Configuration
st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
# Main Application
st.title("AI-Powered Resume Analyzer 🧠")
st.markdown("""
Upload a candidate's resume and paste the job description to get:
- Candidate profile analysis
- Job requirement matching
- Automated interview questions
""")
# File Upload and JD Input
with st.container():
col1, col2 = st.columns([2, 3])
with col1:
uploaded_file = st.file_uploader(
"Upload Resume (PDF/DOCX/TXT)",
type=['pdf', 'docx', 'txt'],
help="Supported formats: PDF, Word, Text"
)
with col2:
jd_input = st.text_area(
"Paste Job Description",
height=200,
placeholder="Paste the complete job description here..."
)
if st.button("Process Resume"):
if uploaded_file and jd_input:
resume_text = extract_text(uploaded_file)
if resume_text:
# Candidate Profile Section
st.header("πŸ‘€ Candidate Profile")
profile_col1, profile_col2 = st.columns([1, 2])
with profile_col1:
st.subheader("Basic Information")
name = extract_name(resume_text)
contact = extract_contact_info(resume_text)
st.markdown(f"""
**Name:** {name}
**Phone:** {contact['phone']}
**Email:** {contact['email']}
""")
with profile_col2:
st.subheader("Professional Summary")
sections = analyze_sections(resume_text)
exp_col, edu_col = st.columns(2)
with exp_col:
with st.expander("Work Experience"):
st.write(sections['experience'])
with edu_col:
with st.expander("Education"):
st.write(sections['education'])
skills_col, cert_col = st.columns(2)
with skills_col:
with st.expander("Skills"):
st.write(sections['skills'])
with cert_col:
with st.expander("Certifications"):
st.write(sections['certifications'])
# Job Matching Analysis
st.header("πŸ“Š Job Compatibility Analysis")
match_score = calculate_similarity(resume_text, jd_input)
col1, col2 = st.columns([1, 3])
with col1:
st.metric("Match Percentage", f"{match_score:.1f}%")
with col2:
st.progress(match_score/100)
st.caption("Semantic similarity score between resume content and job description")
# Interview Questions
st.header("❓ Suggested Interview Questions")
questions = generate_interview_questions(resume_text, jd_input)
if questions:
st.write(questions)
# cleaned_questions = questions.replace("\\n", "\n").split("\n")
# for i, q in enumerate(cleaned_questions[:5]):
# st.markdown(f"{i+1}. {q.strip()}")
else:
st.warning("Could not generate questions. Please try with more detailed inputs.")
else:
st.info("πŸ‘† Please upload a resume and enter a job description to begin analysis")
# Footer
st.markdown("---")
st.markdown("Built with β™₯ using [Streamlit](https://streamlit.io) | [Hugging Face](https://huggingface.co) | [Spacy](https://spacy.io) | FAISS | Groq AI")