Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import re | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import spacy | |
from collections import Counter | |
import json | |
import PyPDF2 | |
import docx | |
import io | |
from pathlib import Path | |
import os | |
import google.generativeai as genai | |
from typing import Dict, Any | |
# Configure Gemini API | |
genai.configure(api_key=os.environ.get("GEMINI_API_KEY")) | |
class ATSScorer: | |
def __init__(self): | |
# Load pre-trained models | |
print("Loading models...") | |
self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2') | |
# Try to load spaCy model, fallback if not available | |
try: | |
self.nlp = spacy.load("en_core_web_sm") | |
except OSError: | |
print("spaCy model not found. Install with: python -m spacy download en_core_web_sm") | |
self.nlp = None | |
# Scoring weights from your requirements | |
self.weights = { | |
'relevant_skills': 0.25, | |
'work_experience': 0.20, | |
'education': 0.10, | |
'certifications': 0.07, | |
'projects': 0.10, | |
'keywords_match': 0.10, | |
'tools_tech': 0.10, | |
'soft_skills': 0.08 | |
} | |
# Enhanced skill categories with domain-specific grouping | |
self.skill_categories = { | |
'programming': ['python', 'java', 'javascript', 'c++', 'c#', 'go', 'rust', 'php', 'ruby', 'kotlin', 'swift', 'typescript', 'dart'], | |
'data_science': ['machine learning', 'deep learning', 'data analysis', 'statistics', 'pandas', 'numpy', 'tensorflow', 'pytorch', 'scikit-learn', 'matplotlib', 'seaborn'], | |
'web_development': ['html', 'css', 'react', 'vue', 'angular', 'node.js', 'express', 'django', 'flask', 'next.js', 'nuxt.js', 'svelte', 'bootstrap', 'tailwind'], | |
'mobile_development': ['react native', 'flutter', 'android studio', 'ios', 'swift', 'kotlin', 'xamarin', 'ionic', 'cordova', 'firebase'], | |
'cybersecurity': ['malware analysis', 'penetration testing', 'vulnerability assessment', 'ida pro', 'ghidra', 'wireshark', 'burp suite', 'metasploit', 'nmap', 'reverse engineering', 'oscp', 'cissp', 'ceh', 'security', 'threat', 'exploit'], | |
'databases': ['sql', 'mysql', 'postgresql', 'mongodb', 'redis', 'elasticsearch', 'oracle', 'sqlite', 'cassandra', 'dynamodb'], | |
'cloud': ['aws', 'azure', 'gcp', 'docker', 'kubernetes', 'terraform', 'jenkins', 'ci/cd', 'devops', 'microservices'], | |
'ui_ux_design': ['figma', 'sketch', 'adobe xd', 'photoshop', 'illustrator', 'wireframing', 'prototyping', 'user research', 'usability testing', 'interaction design', 'visual design', 'design thinking', 'user journey', 'persona', 'a/b testing'], | |
'business_analysis': ['business analysis', 'requirements gathering', 'stakeholder management', 'process mapping', 'gap analysis', 'user stories', 'acceptance criteria', 'brd', 'frd', 'visio', 'lucidchart', 'jira', 'confluence', 'agile', 'scrum', 'waterfall'], | |
'marketing': ['digital marketing', 'content marketing', 'social media marketing', 'seo', 'sem', 'ppc', 'google ads', 'facebook ads', 'email marketing', 'marketing automation', 'analytics', 'google analytics', 'hubspot', 'salesforce', 'brand management', 'campaign management'], | |
'consultancy': ['strategic planning', 'business strategy', 'change management', 'project management', 'stakeholder engagement', 'process improvement', 'risk assessment', 'financial analysis', 'market research', 'competitive analysis', 'presentation skills', 'client management'], | |
'ai_ml_engineering': ['artificial intelligence', 'machine learning', 'deep learning', 'neural networks', 'nlp', 'computer vision', 'tensorflow', 'pytorch', 'keras', 'opencv', 'transformers', 'bert', 'gpt', 'llm', 'mlops', 'model deployment', 'feature engineering', 'hyperparameter tuning'], | |
'soft_skills': ['leadership', 'teamwork', 'communication', 'problem solving', 'project management', 'collaboration', 'analytical', 'creative'] | |
} | |
# Fixed domain indicators with better separation and priority scoring | |
self.domain_indicators = { | |
'web_development': { | |
'high_priority': ['web developer', 'frontend developer', 'backend developer', 'full stack developer', 'full-stack developer', 'web development', 'frontend development', 'backend development', 'fullstack'], | |
'medium_priority': ['web', 'frontend', 'backend', 'full stack', 'website development', 'web application development', 'web app', 'spa', 'single page application'], | |
'low_priority': ['html', 'css', 'javascript', 'react', 'vue', 'angular', 'node.js', 'express', 'django', 'flask', 'responsive design'] | |
}, | |
'ui_ux_design': { | |
'high_priority': ['ui designer', 'ux designer', 'ui/ux designer', 'product designer', 'user experience designer', 'user interface designer', 'design lead', 'visual designer'], | |
'medium_priority': ['ui design', 'ux design', 'user experience', 'user interface', 'interaction design', 'visual design', 'product design'], | |
'low_priority': ['figma', 'sketch', 'adobe xd', 'wireframing', 'prototyping', 'user research', 'usability testing'] | |
}, | |
'mobile_development': { | |
'high_priority': ['mobile developer', 'android developer', 'ios developer', 'mobile app developer', 'app developer'], | |
'medium_priority': ['mobile', 'android', 'ios', 'app development', 'mobile application', 'cross-platform'], | |
'low_priority': ['react native', 'flutter', 'swift', 'kotlin', 'xamarin'] | |
}, | |
'data_science': { | |
'high_priority': ['data scientist', 'data analyst', 'machine learning engineer', 'data engineer'], | |
'medium_priority': ['data science', 'machine learning', 'analytics', 'data analysis', 'ai', 'artificial intelligence'], | |
'low_priority': ['python', 'pandas', 'numpy', 'tensorflow', 'pytorch'] | |
}, | |
'cybersecurity': { | |
'high_priority': ['security analyst', 'cybersecurity specialist', 'security engineer', 'penetration tester', 'security researcher'], | |
'medium_priority': ['security', 'malware', 'vulnerability', 'penetration', 'threat', 'exploit', 'cybersecurity', 'infosec', 'reverse engineering'], | |
'low_priority': ['wireshark', 'burp suite', 'metasploit', 'nmap'] | |
}, | |
'devops': { | |
'high_priority': ['devops engineer', 'site reliability engineer', 'infrastructure engineer', 'cloud engineer'], | |
'medium_priority': ['devops', 'infrastructure', 'deployment', 'ci/cd', 'automation', 'cloud'], | |
'low_priority': ['docker', 'kubernetes', 'terraform', 'jenkins'] | |
}, | |
'game_development': { | |
'high_priority': ['game developer', 'game programmer', 'unity developer', 'unreal developer'], | |
'medium_priority': ['game', 'unity', 'unreal', 'gaming', 'game development', '3d', 'graphics'], | |
'low_priority': ['c#', 'c++', 'opengl', 'directx'] | |
}, | |
'business_analysis': { | |
'high_priority': ['business analyst', 'systems analyst', 'functional analyst', 'requirements analyst'], | |
'medium_priority': ['business analysis', 'requirements', 'stakeholder', 'process', 'analyst', 'functional requirements', 'business requirements'], | |
'low_priority': ['jira', 'confluence', 'visio', 'lucidchart'] | |
}, | |
'marketing': { | |
'high_priority': ['marketing manager', 'digital marketing specialist', 'marketing analyst', 'content marketer'], | |
'medium_priority': ['marketing', 'digital marketing', 'content marketing', 'social media', 'seo', 'brand', 'campaign', 'advertising', 'promotion', 'market research'], | |
'low_priority': ['google ads', 'facebook ads', 'hubspot', 'salesforce'] | |
}, | |
'consultancy': { | |
'high_priority': ['consultant', 'management consultant', 'strategy consultant', 'business consultant'], | |
'medium_priority': ['consulting', 'advisory', 'strategy', 'strategic', 'transformation', 'change management', 'business consulting', 'management consulting'], | |
'low_priority': ['powerpoint', 'excel', 'presentation'] | |
}, | |
'ai_ml_engineering': { | |
'high_priority': ['ai engineer', 'ml engineer', 'machine learning engineer', 'ai specialist', 'nlp engineer'], | |
'medium_priority': ['artificial intelligence', 'deep learning', 'neural networks', 'nlp engineer', 'computer vision', 'mlops'], | |
'low_priority': ['tensorflow', 'pytorch', 'keras', 'opencv'] | |
} | |
} | |
self.education_keywords = ['bachelor', 'master', 'phd', 'degree', 'university', 'college', 'education', 'graduated'] | |
self.certification_keywords = ['certified', 'certification', 'certificate', 'licensed', 'accredited'] | |
self.project_keywords = ['project', 'developed', 'built', 'created', 'implemented', 'designed'] | |
# Extended education patterns for undergraduates | |
self.education_patterns = { | |
'undergraduate': ['undergraduate', 'pursuing', 'currently enrolled', 'final year', 'third year', 'fourth year', 'sophomore', 'junior', 'senior'], | |
'year_indicators': ['first year', 'second year', 'third year', 'fourth year', 'final year', 'sophomore', 'junior', 'senior'], | |
'degree_types': ['bachelor', 'bs', 'ba', 'btech', 'bsc', 'be', 'master', 'ms', 'ma', 'mtech', 'msc', 'phd', 'doctorate', 'mba', 'bba', 'bfa', 'mfa'] | |
} | |
# Soft skills inference from interests and activities | |
self.interest_skill_mapping = { | |
'creativity': ['art', 'drawing', 'painting', 'design', 'photography', 'music', 'writing', 'creative', 'sketch'], | |
'leadership': ['captain', 'president', 'head', 'leader', 'coordinator', 'organizer', 'mentor', 'ncc', 'scouts'], | |
'teamwork': ['team', 'collaboration', 'group projects', 'sports', 'football', 'basketball', 'cricket', 'volleyball'], | |
'dedication': ['marathon', 'athletics', 'gym', 'fitness', 'ncc', 'volunteer', 'community service', 'consistent'], | |
'analytical': ['chess', 'puzzle', 'mathematics', 'strategy', 'analysis', 'research', 'debate'], | |
'communication': ['debate', 'public speaking', 'presentation', 'writing', 'blog', 'theater', 'drama'], | |
'adaptability': ['travel', 'different cultures', 'international', 'languages', 'diverse'], | |
'persistence': ['marathon', 'long distance', 'endurance', 'consistent', 'regular', 'discipline'] | |
} | |
# Project category patterns for better classification | |
self.project_categories = { | |
'web_development': [ | |
'website', 'web app', 'web application', 'e-commerce', 'blog', 'portfolio', 'dashboard', | |
'frontend', 'backend', 'full stack', 'responsive', 'landing page', 'cms', | |
'online store', 'booking system', 'social media', 'chat app', 'forum' | |
], | |
'mobile_development': [ | |
'mobile app', 'android app', 'ios app', 'flutter app', 'react native', 'mobile application', | |
'app development', 'cross-platform', 'native app', 'hybrid app', 'mobile game' | |
], | |
'data_science': [ | |
'machine learning', 'data analysis', 'prediction model', 'recommendation system', | |
'data visualization', 'analytics', 'ai model', 'neural network', 'classification', | |
'regression', 'clustering', 'sentiment analysis', 'nlp', 'computer vision' | |
], | |
'cybersecurity': [ | |
'security tool', 'vulnerability scanner', 'penetration testing', 'malware analysis', | |
'encryption', 'security audit', 'threat detection', 'firewall', 'intrusion detection', | |
'security framework', 'ethical hacking', 'forensics' | |
], | |
'game_development': [ | |
'game', 'unity', 'unreal', '2d game', '3d game', 'mobile game', 'web game', | |
'game engine', 'graphics', 'animation', 'gameplay', 'level design' | |
], | |
'devops': [ | |
'ci/cd', 'deployment', 'automation', 'infrastructure', 'monitoring', 'containerization', | |
'orchestration', 'pipeline', 'cloud deployment', 'server management' | |
], | |
'desktop_application': [ | |
'desktop app', 'gui application', 'desktop software', 'system tool', 'utility', | |
'desktop game', 'productivity tool', 'file manager', 'text editor' | |
], | |
'api_backend': [ | |
'api', 'rest api', 'backend service', 'microservice', 'web service', 'server', | |
'database integration', 'authentication system', 'payment gateway' | |
], | |
'ui_ux_design': [ | |
'ui design', 'ux design', 'user interface', 'user experience', 'wireframe', 'prototype', | |
'mockup', 'design system', 'user research', 'usability testing', 'interaction design', | |
'visual design', 'app design', 'website design' | |
], | |
'business_analysis': [ | |
'business analysis', 'requirements gathering', 'process mapping', 'workflow design', | |
'business process', 'system analysis', 'gap analysis', 'stakeholder analysis', | |
'business requirements', 'functional requirements' | |
], | |
'marketing': [ | |
'marketing campaign', 'digital marketing', 'social media campaign', 'content strategy', | |
'seo optimization', 'brand campaign', 'market research', 'customer analysis', | |
'marketing automation', 'email campaign' | |
], | |
'ai_ml_engineering': [ | |
'ai system', 'ml pipeline', 'deep learning model', 'neural network', 'nlp system', | |
'computer vision', 'recommendation engine', 'chatbot', 'ai application', | |
'model deployment', 'mlops', 'feature engineering' | |
] | |
} | |
def analyze_cv(self, cv_text: str, job_description: str) -> Dict[str, Any]: | |
""" | |
Analyze CV against job description using Gemini AI | |
""" | |
try: | |
prompt = f"""You are a smart and unbiased AI CV screening assistant. Your task is to evaluate how well a candidate's resume (CV) matches a job description. The job description may include one or more roles and may contain responsibilities, expectations, and skill requirements. | |
Carefully review both the CV and the Job Description, and provide the output as a **valid JSON object** with the following keys: | |
1. **reasoning** (string): Provide a concise but insightful explanation of how well the candidate matches the job requirements β mention key matching points like role alignment, experience, and relevant technologies. | |
2. **skills_available** (array of 6 or fewer strings): List up to 6 skills or competencies from the CV that strongly align with the job description. | |
3. **missing** (array of 6 or fewer strings): List up to 6 important skills, experiences, or qualifications the candidate lacks based on the job description. If nothing is missing, return a single string in the array: "You are good to go". | |
CV: | |
\"\"\" | |
{cv_text} | |
\"\"\" | |
Job Description: | |
\"\"\" | |
{job_description} | |
\"\"\" | |
""" | |
model = genai.GenerativeModel('gemini-2.0-flash-exp') | |
response = model.generate_content(prompt) | |
# Extract JSON from response | |
text = response.text | |
json_start = text.find("{") | |
json_end = text.rfind("}") + 1 | |
if json_start != -1 and json_end != -1: | |
json_string = text[json_start:json_end] | |
parsed_result = json.loads(json_string) | |
return {"success": True, "result": parsed_result} | |
else: | |
return {"success": False, "message": "Could not parse JSON response"} | |
except Exception as e: | |
print(f'Error analyzing CV: {e}') | |
return {"success": False, "message": f"Error: {str(e)}"} | |
def format_analysis_output(self, analysis_result: Dict[str, Any]) -> str: | |
""" | |
Format the analysis result for display in Gradio | |
""" | |
if not analysis_result.get("success"): | |
return f"β **Error:** {analysis_result.get('message', 'Unknown error')}" | |
result = analysis_result["result"] | |
output = "## π **AI-Powered CV Analysis**\n\n" | |
# Reasoning section | |
output += "### π **Analysis & Reasoning**\n" | |
output += f"{result.get('reasoning', 'No reasoning provided')}\n\n" | |
# Skills available | |
output += "### β **Matching Skills Found**\n" | |
skills = result.get('skills_available', []) | |
if skills: | |
for skill in skills: | |
output += f"β’ {skill}\n" | |
else: | |
output += "β’ No matching skills identified\n" | |
output += "\n" | |
# Missing skills | |
output += "### β οΈ **Areas for Improvement**\n" | |
missing = result.get('missing', []) | |
if missing: | |
if len(missing) == 1 and missing[0] == "You are good to go": | |
output += "π **Excellent! You are good to go!**\n" | |
else: | |
for item in missing: | |
output += f"β’ {item}\n" | |
else: | |
output += "β’ No gaps identified\n" | |
return output | |
def extract_text_from_pdf(self, pdf_file): | |
"""Extract text from PDF file""" | |
try: | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() + "\n" | |
return text.strip() | |
except Exception as e: | |
raise Exception(f"Error reading PDF: {str(e)}") | |
def extract_text_from_docx(self, docx_file): | |
"""Extract text from DOCX file""" | |
try: | |
doc = docx.Document(docx_file) | |
text = "" | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + "\n" | |
return text.strip() | |
except Exception as e: | |
raise Exception(f"Error reading DOCX: {str(e)}") | |
def extract_text_from_file(self, file): | |
"""Extract text from uploaded file (PDF or DOCX)""" | |
if file is None: | |
return "" | |
file_path = Path(file) | |
file_extension = file_path.suffix.lower() | |
try: | |
if file_extension == '.pdf': | |
return self.extract_text_from_pdf(file) | |
elif file_extension in ['.docx', '.doc']: | |
return self.extract_text_from_docx(file) | |
else: | |
raise Exception(f"Unsupported file format: {file_extension}. Please upload PDF or DOCX files.") | |
except Exception as e: | |
raise Exception(f"Error processing file: {str(e)}") | |
def preprocess_text(self, text): | |
"""Clean and preprocess text""" | |
# Convert to lowercase | |
text = text.lower() | |
# Remove extra whitespace | |
text = re.sub(r'\s+', ' ', text) | |
# Remove special characters but keep important ones | |
text = re.sub(r'[^\w\s\-\+\#\.]', ' ', text) | |
return text.strip() | |
def extract_skills_from_text(self, text, domain=None): | |
"""Extract skills from text based on domain""" | |
text = self.preprocess_text(text) | |
found_skills = [] | |
# If domain is specified, prioritize skills from that domain | |
if domain and domain in self.skill_categories: | |
domain_skills = self.skill_categories[domain] | |
for skill in domain_skills: | |
if skill.lower() in text: | |
found_skills.append(skill) | |
# Also check all skill categories | |
for category, skills in self.skill_categories.items(): | |
for skill in skills: | |
if skill.lower() in text and skill not in found_skills: | |
found_skills.append(skill) | |
return list(set(found_skills)) | |
def detect_domain(self, text): | |
"""Detect the primary domain/field from text""" | |
text = self.preprocess_text(text) | |
domain_scores = {} | |
for domain, priorities in self.domain_indicators.items(): | |
score = 0 | |
# High priority keywords | |
for keyword in priorities['high_priority']: | |
if keyword in text: | |
score += 3 | |
# Medium priority keywords | |
for keyword in priorities['medium_priority']: | |
if keyword in text: | |
score += 2 | |
# Low priority keywords | |
for keyword in priorities['low_priority']: | |
if keyword in text: | |
score += 1 | |
domain_scores[domain] = score | |
# Return the domain with highest score | |
if domain_scores: | |
return max(domain_scores, key=domain_scores.get) | |
return None | |
def calculate_relevant_skills_score(self, job_description, resume): | |
"""Calculate relevant skills score""" | |
# Detect domain from job description | |
job_domain = self.detect_domain(job_description) | |
# Extract skills from both texts | |
job_skills = self.extract_skills_from_text(job_description, job_domain) | |
resume_skills = self.extract_skills_from_text(resume, job_domain) | |
if not job_skills: | |
return 50 # Default score if no skills detected in job description | |
# Calculate overlap | |
matching_skills = set(job_skills) & set(resume_skills) | |
skill_match_ratio = len(matching_skills) / len(job_skills) | |
# Bonus for domain-specific skills | |
domain_bonus = 0 | |
if job_domain and job_domain in self.skill_categories: | |
domain_skills = self.skill_categories[job_domain] | |
domain_matches = [skill for skill in matching_skills if skill in domain_skills] | |
domain_bonus = min(15, len(domain_matches) * 3) | |
# Calculate base score | |
base_score = min(85, skill_match_ratio * 100) | |
final_score = min(100, base_score + domain_bonus) | |
return final_score | |
def extract_experience_years(self, text): | |
"""Extract years of experience from text""" | |
text = self.preprocess_text(text) | |
# Patterns for experience extraction | |
patterns = [ | |
r'(\d+)\+?\s*years?\s*(?:of\s*)?experience', | |
r'(\d+)\+?\s*years?\s*(?:of\s*)?(?:work\s*)?experience', | |
r'experience\s*(?:of\s*)?(\d+)\+?\s*years?', | |
r'(\d+)\+?\s*years?\s*(?:in|of|with)', | |
r'over\s*(\d+)\s*years?', | |
r'more\s*than\s*(\d+)\s*years?' | |
] | |
years = [] | |
for pattern in patterns: | |
matches = re.findall(pattern, text) | |
years.extend([int(match) for match in matches]) | |
# Also look for date ranges in experience section | |
date_patterns = [ | |
r'(\d{4})\s*-\s*(\d{4})', | |
r'(\d{4})\s*to\s*(\d{4})', | |
r'(\d{4})\s*β\s*(\d{4})' | |
] | |
current_year = 2024 | |
for pattern in date_patterns: | |
matches = re.findall(pattern, text) | |
for start, end in matches: | |
start_year = int(start) | |
end_year = int(end) if end != 'present' else current_year | |
if end_year > start_year: | |
years.append(end_year - start_year) | |
return max(years) if years else 0 | |
def calculate_work_experience_score(self, job_description, resume): | |
"""Calculate work experience score""" | |
# Extract required experience from job description | |
job_experience = self.extract_experience_years(job_description) | |
resume_experience = self.extract_experience_years(resume) | |
# Look for experience-related keywords in resume | |
experience_keywords = ['experience', 'worked', 'employed', 'position', 'role', 'job', 'internship', 'intern'] | |
resume_lower = resume.lower() | |
experience_mentions = sum(1 for keyword in experience_keywords if keyword in resume_lower) | |
# Calculate score based on experience match | |
if job_experience == 0: | |
# If no specific experience required, base on mentions | |
return min(80, 40 + experience_mentions * 8) | |
if resume_experience >= job_experience: | |
return min(100, 80 + (resume_experience - job_experience) * 2) | |
elif resume_experience >= job_experience * 0.7: | |
return 70 | |
elif resume_experience >= job_experience * 0.5: | |
return 60 | |
else: | |
return max(30, 30 + experience_mentions * 5) | |
def calculate_education_score(self, job_description, resume): | |
"""Calculate education score""" | |
resume_lower = resume.lower() | |
job_lower = job_description.lower() | |
# Check for degree types | |
degree_score = 0 | |
for degree in self.education_patterns['degree_types']: | |
if degree in resume_lower: | |
degree_score += 20 | |
break | |
# Check for education keywords | |
education_mentions = sum(1 for keyword in self.education_keywords if keyword in resume_lower) | |
education_score = min(30, education_mentions * 10) | |
# Check for undergraduate patterns | |
undergraduate_score = 0 | |
for pattern in self.education_patterns['undergraduate']: | |
if pattern in resume_lower: | |
undergraduate_score = 15 | |
break | |
# Year indicators | |
year_score = 0 | |
for year in self.education_patterns['year_indicators']: | |
if year in resume_lower: | |
year_score = 10 | |
break | |
# Bonus for relevant field | |
field_bonus = 0 | |
domain = self.detect_domain(job_description) | |
if domain: | |
domain_keywords = [domain.replace('_', ' '), domain.replace('_', '')] | |
for keyword in domain_keywords: | |
if keyword in resume_lower: | |
field_bonus = 20 | |
break | |
total_score = degree_score + education_score + undergraduate_score + year_score + field_bonus | |
return min(100, max(40, total_score)) | |
def calculate_certifications_score(self, job_description, resume): | |
"""Calculate certifications score""" | |
resume_lower = resume.lower() | |
# Check for certification keywords | |
cert_mentions = sum(1 for keyword in self.certification_keywords if keyword in resume_lower) | |
# Look for specific certification patterns | |
cert_patterns = [ | |
r'certified\s+\w+', | |
r'\w+\s+certification', | |
r'\w+\s+certificate', | |
r'licensed\s+\w+', | |
r'accredited\s+\w+' | |
] | |
pattern_matches = 0 | |
for pattern in cert_patterns: | |
if re.search(pattern, resume_lower): | |
pattern_matches += 1 | |
# Domain-specific certifications | |
domain = self.detect_domain(job_description) | |
domain_cert_bonus = 0 | |
if domain == 'cybersecurity': | |
cyber_certs = ['cissp', 'ceh', 'oscp', 'comptia', 'security+'] | |
for cert in cyber_certs: | |
if cert in resume_lower: | |
domain_cert_bonus += 15 | |
elif domain == 'cloud': | |
cloud_certs = ['aws', 'azure', 'gcp', 'cloud practitioner'] | |
for cert in cloud_certs: | |
if cert in resume_lower: | |
domain_cert_bonus += 15 | |
base_score = min(60, cert_mentions * 15 + pattern_matches * 10) | |
total_score = min(100, base_score + domain_cert_bonus) | |
return max(40, total_score) if cert_mentions > 0 or pattern_matches > 0 else 40 | |
def categorize_projects(self, project_text): | |
"""Categorize projects based on content""" | |
project_text = self.preprocess_text(project_text) | |
categories = [] | |
for category, keywords in self.project_categories.items(): | |
for keyword in keywords: | |
if keyword in project_text: | |
categories.append(category) | |
break | |
return categories | |
def calculate_projects_score(self, job_description, resume): | |
"""Calculate projects score""" | |
resume_lower = resume.lower() | |
# Extract project mentions | |
project_mentions = sum(1 for keyword in self.project_keywords if keyword in resume_lower) | |
# Look for project sections | |
project_section_indicators = ['projects', 'personal projects', 'academic projects', 'work projects'] | |
has_project_section = any(indicator in resume_lower for indicator in project_section_indicators) | |
# Categorize projects | |
project_categories = self.categorize_projects(resume) | |
job_domain = self.detect_domain(job_description) | |
# Calculate relevance | |
relevance_bonus = 0 | |
if job_domain and job_domain in project_categories: | |
relevance_bonus = 25 | |
# Calculate base score | |
base_score = min(50, project_mentions * 8) | |
section_bonus = 20 if has_project_section else 0 | |
category_bonus = min(15, len(project_categories) * 3) | |
total_score = base_score + section_bonus + category_bonus + relevance_bonus | |
return min(100, max(30, total_score)) | |
def calculate_keywords_match_score(self, job_description, resume): | |
"""Calculate keyword matching score using semantic similarity""" | |
try: | |
# Preprocess texts | |
job_text = self.preprocess_text(job_description) | |
resume_text = self.preprocess_text(resume) | |
# Get embeddings | |
job_embedding = self.sentence_model.encode([job_text]) | |
resume_embedding = self.sentence_model.encode([resume_text]) | |
# Calculate cosine similarity | |
similarity = cosine_similarity(job_embedding, resume_embedding)[0][0] | |
# Convert to percentage | |
similarity_score = similarity * 100 | |
# Add keyword overlap bonus | |
job_words = set(job_text.split()) | |
resume_words = set(resume_text.split()) | |
# Filter out common words | |
common_words = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'must', 'shall', 'a', 'an', 'this', 'that', 'these', 'those'} | |
job_words = job_words - common_words | |
resume_words = resume_words - common_words | |
if job_words: | |
overlap = len(job_words & resume_words) / len(job_words) | |
overlap_bonus = overlap * 20 | |
else: | |
overlap_bonus = 0 | |
final_score = min(100, similarity_score + overlap_bonus) | |
return max(30, final_score) | |
except Exception as e: | |
print(f"Error in keyword matching: {e}") | |
# Fallback to simple word matching | |
job_words = set(job_description.lower().split()) | |
resume_words = set(resume.lower().split()) | |
if job_words: | |
overlap = len(job_words & resume_words) / len(job_words) | |
return min(100, max(30, overlap * 100)) | |
return 50 | |
def calculate_tools_tech_score(self, job_description, resume): | |
"""Calculate tools and technology score""" | |
# Extract tools and technologies from both texts | |
job_tools = self.extract_skills_from_text(job_description) | |
resume_tools = self.extract_skills_from_text(resume) | |
# Focus on technical skills | |
technical_categories = ['programming', 'databases', 'cloud', 'web_development', 'mobile_development', 'data_science', 'cybersecurity', 'ai_ml_engineering'] | |
job_tech_skills = [] | |
resume_tech_skills = [] | |
for category in technical_categories: | |
if category in self.skill_categories: | |
category_skills = self.skill_categories[category] | |
job_tech_skills.extend([skill for skill in job_tools if skill in category_skills]) | |
resume_tech_skills.extend([skill for skill in resume_tools if skill in category_skills]) | |
if not job_tech_skills: | |
return 60 # Default score if no technical skills in job description | |
# Calculate overlap | |
matching_tools = set(job_tech_skills) & set(resume_tech_skills) | |
tool_match_ratio = len(matching_tools) / len(job_tech_skills) | |
# Bonus for having more tools than required | |
extra_tools_bonus = min(15, max(0, len(resume_tech_skills) - len(job_tech_skills)) * 2) | |
base_score = tool_match_ratio * 85 | |
final_score = min(100, base_score + extra_tools_bonus) | |
return max(40, final_score) | |
def infer_soft_skills(self, text): | |
"""Infer soft skills from interests and activities""" | |
text = self.preprocess_text(text) | |
inferred_skills = [] | |
for skill, indicators in self.interest_skill_mapping.items(): | |
for indicator in indicators: | |
if indicator in text: | |
inferred_skills.append(skill) | |
break | |
return inferred_skills | |
def calculate_soft_skills_score(self, job_description, resume): | |
"""Calculate soft skills score""" | |
# Direct soft skills from skill categories | |
job_soft_skills = [skill for skill in self.skill_categories['soft_skills'] if skill in job_description.lower()] | |
resume_soft_skills = [skill for skill in self.skill_categories['soft_skills'] if skill in resume.lower()] | |
# Inferred soft skills from activities and interests | |
inferred_skills = self.infer_soft_skills(resume) | |
# Combine direct and inferred skills | |
all_resume_soft_skills = list(set(resume_soft_skills + inferred_skills)) | |
if not job_soft_skills: | |
# If no specific soft skills mentioned in job, give credit for having any | |
return min(80, 50 + len(all_resume_soft_skills) * 5) | |
# Calculate overlap | |
matching_soft_skills = set(job_soft_skills) & set(all_resume_soft_skills) | |
if job_soft_skills: | |
soft_skill_ratio = len(matching_soft_skills) / len(job_soft_skills) | |
else: | |
soft_skill_ratio = 0.6 # Default ratio | |
# Bonus for having diverse soft skills | |
diversity_bonus = min(20, len(all_resume_soft_skills) * 3) | |
base_score = soft_skill_ratio * 70 | |
final_score = min(100, base_score + diversity_bonus) | |
return max(50, final_score) | |
def calculate_final_score(self, job_description, resume): | |
"""Calculate the weighted final score""" | |
scores = {} | |
# Calculate individual dimension scores | |
scores['relevant_skills'] = self.calculate_relevant_skills_score(job_description, resume) | |
scores['work_experience'] = self.calculate_work_experience_score(job_description, resume) | |
scores['education'] = self.calculate_education_score(job_description, resume) | |
scores['certifications'] = self.calculate_certifications_score(job_description, resume) | |
scores['projects'] = self.calculate_projects_score(job_description, resume) | |
scores['keywords_match'] = self.calculate_keywords_match_score(job_description, resume) | |
scores['tools_tech'] = self.calculate_tools_tech_score(job_description, resume) | |
scores['soft_skills'] = self.calculate_soft_skills_score(job_description, resume) | |
# Calculate weighted final score | |
final_score = sum(scores[dim] * self.weights[dim] for dim in scores) | |
return final_score, scores | |
# Initialize the scorer | |
scorer = ATSScorer() | |
def score_resume(job_description, resume_file, resume_text): | |
"""Enhanced function to score resume and provide AI analysis""" | |
if not job_description.strip(): | |
return "Please provide a job description.", "", "" | |
# Determine resume source | |
resume_content = "" | |
if resume_file is not None: | |
try: | |
resume_content = scorer.extract_text_from_file(resume_file) | |
if not resume_content.strip(): | |
return "Could not extract text from the uploaded file. Please check the file format.", "", "" | |
except Exception as e: | |
return f"Error processing file: {str(e)}", "", "" | |
elif resume_text.strip(): | |
resume_content = resume_text.strip() | |
else: | |
return "Please provide either a resume file (PDF/DOCX) or paste resume text.", "", "" | |
try: | |
# Get ATS score | |
final_score, dimension_scores = scorer.calculate_final_score(job_description, resume_content) | |
# Get AI analysis | |
analysis_result = scorer.analyze_cv(resume_content, job_description) | |
ai_analysis = scorer.format_analysis_output(analysis_result) | |
# Create ATS breakdown | |
ats_breakdown = f""" | |
## Overall ATS Score: {final_score:.1f}/100 | |
### Dimension Breakdown: | |
- **Relevant Skills** (25%): {dimension_scores['relevant_skills']:.1f}/100 | |
- **Work Experience** (20%): {dimension_scores['work_experience']:.1f}/100 | |
- **Education** (10%): {dimension_scores['education']:.1f}/100 | |
- **Certifications & Courses** (7%): {dimension_scores['certifications']:.1f}/100 | |
- **Projects** (10%): {dimension_scores['projects']:.1f}/100 | |
- **Keywords Match** (10%): {dimension_scores['keywords_match']:.1f}/100 | |
- **Tools & Technologies** (10%): {dimension_scores['tools_tech']:.1f}/100 | |
- **Soft Skills Indicators** (8%): {dimension_scores['soft_skills']:.1f}/100 | |
### Score Interpretation: | |
- **90-100**: Excellent match | |
- **76-89**: Very good match | |
- **56-75**: Good match | |
- **45-55**: Fair match | |
- **Below 40**: Poor match | |
""" | |
# Create score chart data | |
chart_data = pd.DataFrame({ | |
'Dimension': [ | |
'Relevant Skills', 'Work Experience', 'Education', | |
'Certifications', 'Projects', 'Keywords Match', | |
'Tools & Tech', 'Soft Skills' | |
], | |
'Score': [ | |
dimension_scores['relevant_skills'], | |
dimension_scores['work_experience'], | |
dimension_scores['education'], | |
dimension_scores['certifications'], | |
dimension_scores['projects'], | |
dimension_scores['keywords_match'], | |
dimension_scores['tools_tech'], | |
dimension_scores['soft_skills'] | |
], | |
'Weight (%)': [25, 20, 10, 7, 10, 10, 10, 8] | |
}) | |
return ats_breakdown, ai_analysis, chart_data | |
except Exception as e: | |
return f"Error processing resume: {str(e)}", "", "" | |
# Create Enhanced Gradio interface | |
with gr.Blocks(title="Enhanced ATS Resume Scorer", theme=gr.themes.Soft()) as demo: | |
gr.Markdown(""" | |
# π― Enhanced ATS Resume Scorer with AI Analysis | |
This tool provides **dual analysis** of your resume: | |
1. **ATS Score** - Technical matching across 8 dimensions | |
2. **AI Analysis** - Intelligent insights and recommendations | |
**π Resume Input:** Upload PDF/DOCX file OR paste text manually | |
**π Job Description:** Paste as text | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
job_desc_input = gr.Textbox( | |
label="π Job Description", | |
placeholder="Paste the complete job description here...", | |
lines=12, | |
max_lines=20 | |
) | |
with gr.Column(): | |
gr.Markdown("### π Resume Input") | |
with gr.Tab("Upload File (PDF/DOCX)"): | |
resume_file_input = gr.File( | |
label="Upload Resume", | |
file_types=[".pdf", ".docx", ".doc"], | |
type="filepath" | |
) | |
gr.Markdown("*Supported formats: PDF, DOCX, DOC*") | |
with gr.Tab("Paste Text"): | |
resume_text_input = gr.Textbox( | |
label="Resume Text", | |
placeholder="Or paste your resume text here...", | |
lines=10, | |
max_lines=15 | |
) | |
score_btn = gr.Button("π Analyze Resume", variant="primary", size="lg") | |
with gr.Row(): | |
with gr.Column(): | |
ats_output = gr.Markdown(label="ATS Scoring Results") | |
with gr.Column(): | |
ai_output = gr.Markdown(label="AI Analysis Results") | |
with gr.Row(): | |
chart_output = gr.Dataframe( | |
label="Dimension Scores", | |
headers=['Dimension', 'Score', 'Weight (%)'], | |
datatype=['str', 'number', 'number'] | |
) | |
score_btn.click( | |
fn=score_resume, | |
inputs=[job_desc_input, resume_file_input, resume_text_input], | |
outputs=[ats_output, ai_output, chart_output] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |