Spaces:

Talha812
/

AI-Powered-RecruitmentAssistant

Running

App Files Files Community

AI-Powered-RecruitmentAssistant / app.py

Talha812

Update app.py

463776a verified 6 months ago

raw

history blame contribute delete

7.89 kB

	import streamlit as st
	import re
	import json
	from PyPDF2 import PdfReader
	from docx import Document
	import spacy
	from sentence_transformers import SentenceTransformer, util
	from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration
	import os
	from groq import Groq

	# Initialize Groq API client
	client = Groq(api_key=os.environ["GROQ_API_KEY"])

	# Initialize NLP components
	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	from spacy.cli import download
	download("en_core_web_sm")
	nlp = spacy.load("en_core_web_sm")
	# st.error("Please install the SpaCy English model: 'python -m spacy download en_core_web_sm'")
	# st.stop()

	# Initialize models
	similarity_model = SentenceTransformer('all-MiniLM-L6-v2')

	# Initialize T5 question generator with proper tokenizer
	tokenizer = AutoTokenizer.from_pretrained("t5-base", use_fast=False)
	model = T5ForConditionalGeneration.from_pretrained("t5-base")
	question_generator = pipeline(
	"text2text-generation",
	model=model,
	tokenizer=tokenizer,
	framework="pt"
	)

	def extract_text(file):
	"""Extract text from various file formats"""
	if file.name.endswith('.pdf'):
	reader = PdfReader(file)
	return " ".join([page.extract_text() for page in reader.pages])
	elif file.name.endswith('.docx'):
	doc = Document(file)
	return " ".join([para.text for para in doc.paragraphs])
	elif file.name.endswith('.txt'):
	return file.read().decode()
	return ""

	def extract_contact_info(text):
	"""Extract phone numbers and emails using regex"""
	phone_pattern = r'\+?\d{1,3}[-.\s]?\(?\d{2,4}\)?[-.\s]?\d{3,4}[-.\s]?\d{4}'
	email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'

	phones = re.findall(phone_pattern, text)
	emails = re.findall(email_pattern, text)

	return {
	'phone': ', '.join(phones) if phones else 'Not found',
	'email': ', '.join(emails) if emails else 'Not found'
	}


	def extract_name(text):
	"""Extract candidate name using SpaCy NER"""
	doc = nlp(text)
	for ent in doc.ents:
	if ent.label_ == 'PERSON':
	return ent.text
	return "Not found"

	def analyze_sections(text):
	"""Parse resume sections using rule-based approach"""
	sections = {
	'experience': [],
	'skills': [],
	'education': [],
	'certifications': []
	}

	current_section = None
	section_keywords = {
	'experience': ['experience', 'work history', 'employment'],
	'skills': ['skills', 'competencies', 'technologies'],
	'education': ['education', 'academic background'],
	'certifications': ['certifications', 'licenses', 'courses']
	}

	for line in text.split('\n'):
	line_lower = line.strip().lower()

	# Detect section headers
	for section, keywords in section_keywords.items():
	if any(keyword in line_lower for keyword in keywords):
	current_section = section
	break
	else:
	if current_section and line.strip():
	sections[current_section].append(line.strip())

	return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}

	def calculate_similarity(resume_text, jd_text):
	"""Calculate semantic similarity between resume and JD"""
	embeddings = similarity_model.encode([resume_text, jd_text])
	return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() * 100

	def generate_interview_questions(resume_text, jd_text):
	"""Generate interview questions using Groq API"""
	input_text = f"Generate interview questions based on the resume and job description.Here is the resume: {resume_text}\n and here is the Job Description:{jd_text} Give me concise to the point questions only. Not description of resume or Job Description."

	response = client.chat.completions.create(
	messages=[
	{"role": "user", "content": input_text}
	],
	model="deepseek-r1-distill-llama-70b"
	)
	return response.choices[0].message.content

	# Streamlit UI Configuration
	st.set_page_config(page_title="AI Resume Analyzer", layout="wide")

	# Main Application
	st.title("AI-Powered Resume Analyzer 🧠")
	st.markdown("""
	Upload a candidate's resume and paste the job description to get:
	- Candidate profile analysis
	- Job requirement matching
	- Automated interview questions
	""")

	# File Upload and JD Input
	with st.container():
	col1, col2 = st.columns([2, 3])

	with col1:
	uploaded_file = st.file_uploader(
	"Upload Resume (PDF/DOCX/TXT)",
	type=['pdf', 'docx', 'txt'],
	help="Supported formats: PDF, Word, Text"
	)

	with col2:
	jd_input = st.text_area(
	"Paste Job Description",
	height=200,
	placeholder="Paste the complete job description here..."
	)

	if st.button("Process Resume"):
	if uploaded_file and jd_input:
	resume_text = extract_text(uploaded_file)

	if resume_text:
	# Candidate Profile Section
	st.header("👤 Candidate Profile")
	profile_col1, profile_col2 = st.columns([1, 2])

	with profile_col1:
	st.subheader("Basic Information")
	name = extract_name(resume_text)
	contact = extract_contact_info(resume_text)

	st.markdown(f"""
	Name: {name}
	Phone: {contact['phone']}
	Email: {contact['email']}
	""")

	with profile_col2:
	st.subheader("Professional Summary")
	sections = analyze_sections(resume_text)

	exp_col, edu_col = st.columns(2)
	with exp_col:
	with st.expander("Work Experience"):
	st.write(sections['experience'])

	with edu_col:
	with st.expander("Education"):
	st.write(sections['education'])

	skills_col, cert_col = st.columns(2)
	with skills_col:
	with st.expander("Skills"):
	st.write(sections['skills'])

	with cert_col:
	with st.expander("Certifications"):
	st.write(sections['certifications'])

	# Job Matching Analysis
	st.header("📊 Job Compatibility Analysis")
	match_score = calculate_similarity(resume_text, jd_input)

	col1, col2 = st.columns([1, 3])
	with col1:
	st.metric("Match Percentage", f"{match_score:.1f}%")

	with col2:
	st.progress(match_score/100)
	st.caption("Semantic similarity score between resume content and job description")

	# Interview Questions
	st.header("❓ Suggested Interview Questions")
	questions = generate_interview_questions(resume_text, jd_input)

	if questions:
	st.write(questions)
	# cleaned_questions = questions.replace("\\n", "\n").split("\n")
	# for i, q in enumerate(cleaned_questions[:5]):
	# st.markdown(f"{i+1}. {q.strip()}")
	else:
	st.warning("Could not generate questions. Please try with more detailed inputs.")

	else:
	st.info("👆 Please upload a resume and enter a job description to begin analysis")

	# Footer
	st.markdown("---")
	st.markdown("Built with ♥ using [Streamlit](https://streamlit.io) \| [Hugging Face](https://huggingface.co) \| [Spacy](https://spacy.io) \| FAISS \| Groq AI")