import gradio as gr import pandas as pd import numpy as np import re from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import spacy from collections import Counter import json import PyPDF2 import docx import io from pathlib import Path class ATSScorer: def __init__(self): # Load pre-trained models print("Loading models...") self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2') # Try to load spaCy model, fallback if not available try: self.nlp = spacy.load("en_core_web_sm") except OSError: print("spaCy model not found. Install with: python -m spacy download en_core_web_sm") self.nlp = None # Scoring weights from your requirements self.weights = { 'relevant_skills': 0.25, 'work_experience': 0.20, 'education': 0.10, 'certifications': 0.07, 'projects': 0.10, 'keywords_match': 0.10, 'tools_tech': 0.10, 'soft_skills': 0.08 } # Enhanced skill categories with domain-specific grouping self.skill_categories = { 'programming': ['python', 'java', 'javascript', 'c++', 'c#', 'go', 'rust', 'php', 'ruby', 'kotlin', 'swift', 'typescript', 'dart'], 'data_science': ['machine learning', 'deep learning', 'data analysis', 'statistics', 'pandas', 'numpy', 'tensorflow', 'pytorch', 'scikit-learn', 'matplotlib', 'seaborn'], 'web_development': ['html', 'css', 'react', 'vue', 'angular', 'node.js', 'express', 'django', 'flask', 'next.js', 'nuxt.js', 'svelte', 'bootstrap', 'tailwind'], 'mobile_development': ['react native', 'flutter', 'android studio', 'ios', 'swift', 'kotlin', 'xamarin', 'ionic', 'cordova', 'firebase'], 'cybersecurity': ['malware analysis', 'penetration testing', 'vulnerability assessment', 'ida pro', 'ghidra', 'wireshark', 'burp suite', 'metasploit', 'nmap', 'reverse engineering', 'oscp', 'cissp', 'ceh', 'security', 'threat', 'exploit'], 'databases': ['sql', 'mysql', 'postgresql', 'mongodb', 'redis', 'elasticsearch', 'oracle', 'sqlite', 'cassandra', 'dynamodb'], 'cloud': ['aws', 'azure', 'gcp', 'docker', 'kubernetes', 'terraform', 'jenkins', 'ci/cd', 'devops', 'microservices'], 'ui_ux_design': ['figma', 'sketch', 'adobe xd', 'photoshop', 'illustrator', 'wireframing', 'prototyping', 'user research', 'usability testing', 'interaction design', 'visual design', 'design thinking', 'user journey', 'persona', 'a/b testing'], 'business_analysis': ['business analysis', 'requirements gathering', 'stakeholder management', 'process mapping', 'gap analysis', 'user stories', 'acceptance criteria', 'brd', 'frd', 'visio', 'lucidchart', 'jira', 'confluence', 'agile', 'scrum', 'waterfall'], 'marketing': ['digital marketing', 'content marketing', 'social media marketing', 'seo', 'sem', 'ppc', 'google ads', 'facebook ads', 'email marketing', 'marketing automation', 'analytics', 'google analytics', 'hubspot', 'salesforce', 'brand management', 'campaign management'], 'consultancy': ['strategic planning', 'business strategy', 'change management', 'project management', 'stakeholder engagement', 'process improvement', 'risk assessment', 'financial analysis', 'market research', 'competitive analysis', 'presentation skills', 'client management'], 'ai_ml_engineering': ['artificial intelligence', 'machine learning', 'deep learning', 'neural networks', 'nlp', 'computer vision', 'tensorflow', 'pytorch', 'keras', 'opencv', 'transformers', 'bert', 'gpt', 'llm', 'mlops', 'model deployment', 'feature engineering', 'hyperparameter tuning'], 'soft_skills': ['leadership', 'teamwork', 'communication', 'problem solving', 'project management', 'collaboration', 'analytical', 'creative'] } # Fixed domain indicators with better separation and priority scoring self.domain_indicators = { 'web_development': { 'high_priority': ['web developer', 'frontend developer', 'backend developer', 'full stack developer', 'full-stack developer', 'web development', 'frontend development', 'backend development', 'fullstack'], 'medium_priority': ['web', 'frontend', 'backend', 'full stack', 'website development', 'web application development', 'web app', 'spa', 'single page application'], 'low_priority': ['html', 'css', 'javascript', 'react', 'vue', 'angular', 'node.js', 'express', 'django', 'flask', 'responsive design'] }, 'ui_ux_design': { 'high_priority': ['ui designer', 'ux designer', 'ui/ux designer', 'product designer', 'user experience designer', 'user interface designer', 'design lead', 'visual designer'], 'medium_priority': ['ui design', 'ux design', 'user experience', 'user interface', 'interaction design', 'visual design', 'product design'], 'low_priority': ['figma', 'sketch', 'adobe xd', 'wireframing', 'prototyping', 'user research', 'usability testing'] }, 'mobile_development': { 'high_priority': ['mobile developer', 'android developer', 'ios developer', 'mobile app developer', 'app developer'], 'medium_priority': ['mobile', 'android', 'ios', 'app development', 'mobile application', 'cross-platform'], 'low_priority': ['react native', 'flutter', 'swift', 'kotlin', 'xamarin'] }, 'data_science': { 'high_priority': ['data scientist', 'data analyst', 'machine learning engineer', 'data engineer'], 'medium_priority': ['data science', 'machine learning', 'analytics', 'data analysis', 'ai', 'artificial intelligence'], 'low_priority': ['python', 'pandas', 'numpy', 'tensorflow', 'pytorch'] }, 'cybersecurity': { 'high_priority': ['security analyst', 'cybersecurity specialist', 'security engineer', 'penetration tester', 'security researcher'], 'medium_priority': ['security', 'malware', 'vulnerability', 'penetration', 'threat', 'exploit', 'cybersecurity', 'infosec', 'reverse engineering'], 'low_priority': ['wireshark', 'burp suite', 'metasploit', 'nmap'] }, 'devops': { 'high_priority': ['devops engineer', 'site reliability engineer', 'infrastructure engineer', 'cloud engineer'], 'medium_priority': ['devops', 'infrastructure', 'deployment', 'ci/cd', 'automation', 'cloud'], 'low_priority': ['docker', 'kubernetes', 'terraform', 'jenkins'] }, 'game_development': { 'high_priority': ['game developer', 'game programmer', 'unity developer', 'unreal developer'], 'medium_priority': ['game', 'unity', 'unreal', 'gaming', 'game development', '3d', 'graphics'], 'low_priority': ['c#', 'c++', 'opengl', 'directx'] }, 'business_analysis': { 'high_priority': ['business analyst', 'systems analyst', 'functional analyst', 'requirements analyst'], 'medium_priority': ['business analysis', 'requirements', 'stakeholder', 'process', 'analyst', 'functional requirements', 'business requirements'], 'low_priority': ['jira', 'confluence', 'visio', 'lucidchart'] }, 'marketing': { 'high_priority': ['marketing manager', 'digital marketing specialist', 'marketing analyst', 'content marketer'], 'medium_priority': ['marketing', 'digital marketing', 'content marketing', 'social media', 'seo', 'brand', 'campaign', 'advertising', 'promotion', 'market research'], 'low_priority': ['google ads', 'facebook ads', 'hubspot', 'salesforce'] }, 'consultancy': { 'high_priority': ['consultant', 'management consultant', 'strategy consultant', 'business consultant'], 'medium_priority': ['consulting', 'advisory', 'strategy', 'strategic', 'transformation', 'change management', 'business consulting', 'management consulting'], 'low_priority': ['powerpoint', 'excel', 'presentation'] }, 'ai_ml_engineering': { 'high_priority': ['ai engineer', 'ml engineer', 'machine learning engineer', 'ai specialist', 'nlp engineer'], 'medium_priority': ['artificial intelligence', 'deep learning', 'neural networks', 'nlp engineer', 'computer vision', 'mlops'], 'low_priority': ['tensorflow', 'pytorch', 'keras', 'opencv'] } } self.education_keywords = ['bachelor', 'master', 'phd', 'degree', 'university', 'college', 'education', 'graduated'] self.certification_keywords = ['certified', 'certification', 'certificate', 'licensed', 'accredited'] self.project_keywords = ['project', 'developed', 'built', 'created', 'implemented', 'designed'] # Extended education patterns for undergraduates self.education_patterns = { 'undergraduate': ['undergraduate', 'pursuing', 'currently enrolled', 'final year', 'third year', 'fourth year', 'sophomore', 'junior', 'senior'], 'year_indicators': ['first year', 'second year', 'third year', 'fourth year', 'final year', 'sophomore', 'junior', 'senior'], 'degree_types': ['bachelor', 'bs', 'ba', 'btech', 'bsc', 'be', 'master', 'ms', 'ma', 'mtech', 'msc', 'phd', 'doctorate', 'mba', 'bba', 'bfa', 'mfa'] } # Soft skills inference from interests and activities self.interest_skill_mapping = { 'creativity': ['art', 'drawing', 'painting', 'design', 'photography', 'music', 'writing', 'creative', 'sketch'], 'leadership': ['captain', 'president', 'head', 'leader', 'coordinator', 'organizer', 'mentor', 'ncc', 'scouts'], 'teamwork': ['team', 'collaboration', 'group projects', 'sports', 'football', 'basketball', 'cricket', 'volleyball'], 'dedication': ['marathon', 'athletics', 'gym', 'fitness', 'ncc', 'volunteer', 'community service', 'consistent'], 'analytical': ['chess', 'puzzle', 'mathematics', 'strategy', 'analysis', 'research', 'debate'], 'communication': ['debate', 'public speaking', 'presentation', 'writing', 'blog', 'theater', 'drama'], 'adaptability': ['travel', 'different cultures', 'international', 'languages', 'diverse'], 'persistence': ['marathon', 'long distance', 'endurance', 'consistent', 'regular', 'discipline'] } # Project category patterns for better classification self.project_categories = { 'web_development': [ 'website', 'web app', 'web application', 'e-commerce', 'blog', 'portfolio', 'dashboard', 'frontend', 'backend', 'full stack', 'responsive', 'landing page', 'cms', 'online store', 'booking system', 'social media', 'chat app', 'forum' ], 'mobile_development': [ 'mobile app', 'android app', 'ios app', 'flutter app', 'react native', 'mobile application', 'app development', 'cross-platform', 'native app', 'hybrid app', 'mobile game' ], 'data_science': [ 'machine learning', 'data analysis', 'prediction model', 'recommendation system', 'data visualization', 'analytics', 'ai model', 'neural network', 'classification', 'regression', 'clustering', 'sentiment analysis', 'nlp', 'computer vision' ], 'cybersecurity': [ 'security tool', 'vulnerability scanner', 'penetration testing', 'malware analysis', 'encryption', 'security audit', 'threat detection', 'firewall', 'intrusion detection', 'security framework', 'ethical hacking', 'forensics' ], 'game_development': [ 'game', 'unity', 'unreal', '2d game', '3d game', 'mobile game', 'web game', 'game engine', 'graphics', 'animation', 'gameplay', 'level design' ], 'devops': [ 'ci/cd', 'deployment', 'automation', 'infrastructure', 'monitoring', 'containerization', 'orchestration', 'pipeline', 'cloud deployment', 'server management' ], 'desktop_application': [ 'desktop app', 'gui application', 'desktop software', 'system tool', 'utility', 'desktop game', 'productivity tool', 'file manager', 'text editor' ], 'api_backend': [ 'api', 'rest api', 'backend service', 'microservice', 'web service', 'server', 'database integration', 'authentication system', 'payment gateway' ], 'ui_ux_design': [ 'ui design', 'ux design', 'user interface', 'user experience', 'wireframe', 'prototype', 'mockup', 'design system', 'user research', 'usability testing', 'interaction design', 'visual design', 'app design', 'website design' ], 'business_analysis': [ 'business analysis', 'requirements gathering', 'process mapping', 'workflow design', 'business process', 'system analysis', 'gap analysis', 'stakeholder analysis', 'business requirements', 'functional requirements' ], 'marketing': [ 'marketing campaign', 'digital marketing', 'social media campaign', 'content strategy', 'seo optimization', 'brand campaign', 'market research', 'customer analysis', 'marketing automation', 'email campaign' ], 'ai_ml_engineering': [ 'ai system', 'ml pipeline', 'deep learning model', 'neural network', 'nlp system', 'computer vision', 'recommendation engine', 'chatbot', 'ai application', 'model deployment', 'mlops', 'feature engineering' ] } def extract_text_from_pdf(self, pdf_file): """Extract text from PDF file""" try: pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page in pdf_reader.pages: text += page.extract_text() + "\n" return text.strip() except Exception as e: raise Exception(f"Error reading PDF: {str(e)}") def extract_text_from_docx(self, docx_file): """Extract text from DOCX file""" try: doc = docx.Document(docx_file) text = "" for paragraph in doc.paragraphs: text += paragraph.text + "\n" return text.strip() except Exception as e: raise Exception(f"Error reading DOCX: {str(e)}") def extract_text_from_file(self, file): """Extract text from uploaded file (PDF or DOCX)""" if file is None: return "" file_path = Path(file.name) file_extension = file_path.suffix.lower() try: if file_extension == '.pdf': return self.extract_text_from_pdf(file.name) elif file_extension in ['.docx', '.doc']: return self.extract_text_from_docx(file.name) else: raise Exception(f"Unsupported file format: {file_extension}. Please upload PDF or DOCX files.") except Exception as e: raise Exception(f"Error processing file: {str(e)}") def preprocess_text(self, text): """Clean and preprocess text""" if not text: return "" text = text.lower().strip() # Remove extra whitespace text = re.sub(r'\s+', ' ', text) return text def detect_job_domain(self, job_desc): """Detect the primary domain of the job with improved priority-based scoring""" job_lower = job_desc.lower() domain_scores = {} for domain, indicators in self.domain_indicators.items(): score = 0 # High priority indicators (job titles, specific roles) - weight 10 for indicator in indicators['high_priority']: if indicator in job_lower: score += 10 # Medium priority indicators (domain-specific terms) - weight 3 for indicator in indicators['medium_priority']: if indicator in job_lower: score += 3 # Low priority indicators (tools, technologies) - weight 1 for indicator in indicators['low_priority']: if indicator in job_lower: score += 1 domain_scores[domain] = score # Return the domain with highest score, or 'general' if no matches if max(domain_scores.values()) > 0: return max(domain_scores, key=domain_scores.get) else: return 'general' def detect_resume_domain(self, resume): """Detect the primary domain of the resume with improved priority-based scoring""" resume_lower = resume.lower() domain_scores = {} for domain, indicators in self.domain_indicators.items(): score = 0 # High priority indicators (job titles, specific roles) - weight 10 for indicator in indicators['high_priority']: if indicator in resume_lower: score += 10 # Medium priority indicators (domain-specific terms) - weight 3 for indicator in indicators['medium_priority']: if indicator in resume_lower: score += 3 # Low priority indicators (tools, technologies) - weight 1 for indicator in indicators['low_priority']: if indicator in resume_lower: score += 1 domain_scores[domain] = score # Return the domain with highest score, or 'general' if no matches if max(domain_scores.values()) > 0: return max(domain_scores, key=domain_scores.get) else: return 'general' def calculate_domain_compatibility(self, job_domain, resume_domain): """Calculate compatibility between job and resume domains""" if job_domain == resume_domain: return 1.0 # More generous domain compatibility matrix compatibility_matrix = { ('cybersecurity', 'web_development'): 0.7, ('cybersecurity', 'mobile_development'): 0.6, ('cybersecurity', 'data_science'): 0.8, ('cybersecurity', 'ai_ml_engineering'): 0.8, ('web_development', 'mobile_development'): 0.9, ('web_development', 'data_science'): 0.8, ('web_development', 'ui_ux_design'): 0.9, ('mobile_development', 'data_science'): 0.7, ('mobile_development', 'ui_ux_design'): 0.8, ('devops', 'web_development'): 0.8, ('devops', 'cybersecurity'): 0.7, ('devops', 'ai_ml_engineering'): 0.8, ('game_development', 'web_development'): 0.7, ('game_development', 'mobile_development'): 0.8, ('ui_ux_design', 'web_development'): 0.9, ('ui_ux_design', 'mobile_development'): 0.8, ('ui_ux_design', 'marketing'): 0.7, ('business_analysis', 'consultancy'): 0.9, ('business_analysis', 'marketing'): 0.7, ('business_analysis', 'data_science'): 0.7, ('marketing', 'consultancy'): 0.8, ('marketing', 'business_analysis'): 0.7, ('marketing', 'ui_ux_design'): 0.7, ('consultancy', 'business_analysis'): 0.9, ('consultancy', 'marketing'): 0.8, ('ai_ml_engineering', 'data_science'): 0.95, ('ai_ml_engineering', 'web_development'): 0.8, ('ai_ml_engineering', 'cybersecurity'): 0.8, ('data_science', 'ai_ml_engineering'): 0.95, } # Check both directions compatibility = compatibility_matrix.get((job_domain, resume_domain), compatibility_matrix.get((resume_domain, job_domain), 0.5)) return compatibility def extract_years_of_experience(self, text): """Extract years of experience from text""" text = text.lower() patterns = [ r'(\d+)\+?\s*years?\s+(?:of\s+)?experience', r'(\d+)\+?\s*yrs?\s+(?:of\s+)?experience', r'experience.*?(\d+)\+?\s*years?', r'(\d+)\+?\s*years?\s+in\s+' ] years = [] for pattern in patterns: matches = re.findall(pattern, text) years.extend([int(match) for match in matches]) return max(years) if years else 0 def extract_contextual_keywords(self, text, job_domain="general"): """Extract keywords with domain context awareness""" text = self.preprocess_text(text) keywords = set() # Get relevant categories based on domain relevant_categories = [] if job_domain == 'cybersecurity': relevant_categories = ['cybersecurity', 'programming'] elif job_domain == 'web_development': relevant_categories = ['web_development', 'programming', 'databases'] elif job_domain == 'mobile_development': relevant_categories = ['mobile_development', 'programming'] elif job_domain == 'data_science': relevant_categories = ['data_science', 'programming', 'databases'] elif job_domain == 'ui_ux_design': relevant_categories = ['ui_ux_design', 'web_development'] elif job_domain == 'business_analysis': relevant_categories = ['business_analysis', 'databases'] elif job_domain == 'marketing': relevant_categories = ['marketing', 'ui_ux_design'] elif job_domain == 'consultancy': relevant_categories = ['consultancy', 'business_analysis'] elif job_domain == 'ai_ml_engineering': relevant_categories = ['ai_ml_engineering', 'data_science', 'programming'] else: relevant_categories = ['programming', 'databases', 'cloud', 'web_development'] # Extract keywords from relevant categories for category in relevant_categories: if category in self.skill_categories: for skill in self.skill_categories[category]: if skill in text: keywords.add(skill) # Use spaCy for entity extraction if available if self.nlp: doc = self.nlp(text) for ent in doc.ents: if ent.label_ in ['ORG', 'PRODUCT', 'LANGUAGE']: keywords.add(ent.text.lower()) return list(keywords) def calculate_semantic_similarity(self, text1, text2): """Calculate semantic similarity between two texts with lower threshold""" if not text1 or not text2: return 0.0 embeddings = self.sentence_model.encode([text1, text2]) similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0] # Lower threshold for more inclusive matching if similarity < 0.15: return 0.0 return max(0, similarity) def score_relevant_skills(self, job_desc, resume): """Score skill relevance with more generous scoring""" job_domain = self.detect_job_domain(job_desc) resume_domain = self.detect_resume_domain(resume) job_keywords = set(self.extract_contextual_keywords(job_desc, job_domain)) resume_keywords = set(self.extract_contextual_keywords(resume, job_domain)) if not job_keywords: # More generous fallback using semantic similarity semantic_score = self.calculate_semantic_similarity(job_desc, resume) * 120 return min(80, semantic_score) # Exact keyword matching exact_matches = len(job_keywords.intersection(resume_keywords)) exact_score = exact_matches / len(job_keywords) # Semantic similarity with higher weight semantic_score = self.calculate_semantic_similarity(job_desc, resume) # More generous base scoring base_score = (exact_score * 0.6 + semantic_score * 0.4) * 120 # Apply domain compatibility with minimal penalty domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain) final_score = base_score * (0.7 + 0.3 * domain_compatibility) # Minimum 70% of base score return min(100, final_score) def score_work_experience(self, job_desc, resume): """Score work experience with more generous scoring""" resume_years = self.extract_years_of_experience(resume) job_years = self.extract_years_of_experience(job_desc) job_domain = self.detect_job_domain(job_desc) resume_domain = self.detect_resume_domain(resume) # Years of experience score if job_years > 0: years_score = min(100, (resume_years / job_years) * 120) else: years_score = 60 if resume_years > 0 else 20 # Domain-aware semantic similarity semantic_score = self.calculate_semantic_similarity(job_desc, resume) * 120 # Apply domain compatibility domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain) # Combine scores with more generous weighting base_score = (years_score * 0.4 + semantic_score * 0.6) final_score = base_score * (0.7 + 0.3 * domain_compatibility) return min(100, final_score) def score_education(self, job_desc, resume): """Score education relevance - Enhanced for undergraduates""" resume_lower = resume.lower() job_lower = job_desc.lower() # Extract required degree from job description required_degrees = [] for degree_type in self.education_patterns['degree_types']: if degree_type in job_lower: required_degrees.append(degree_type) # Check if candidate is undergraduate is_undergraduate = any(pattern in resume_lower for pattern in self.education_patterns['undergraduate']) # Determine candidate's year if undergraduate year_score_multiplier = 1.0 if is_undergraduate: if any(year in resume_lower for year in ['final year', 'fourth year', 'senior']): year_score_multiplier = 0.95 elif any(year in resume_lower for year in ['third year', 'junior']): year_score_multiplier = 0.85 elif any(year in resume_lower for year in ['second year', 'sophomore']): year_score_multiplier = 0.70 elif any(year in resume_lower for year in ['first year', 'freshman']): year_score_multiplier = 0.55 # Check degree match with more generous scoring degree_match_score = 0 if required_degrees: candidate_degrees = [] for degree_type in self.education_patterns['degree_types']: if degree_type in resume_lower: candidate_degrees.append(degree_type) if candidate_degrees: if any(req_deg in candidate_degrees for req_deg in required_degrees): degree_match_score = 85 elif any(deg in ['btech', 'be', 'bs', 'bachelor'] for deg in candidate_degrees) and \ any(deg in ['bachelor', 'btech', 'be', 'bs'] for deg in required_degrees): degree_match_score = 80 elif any(deg in ['master', 'ms', 'ma', 'mtech', 'mba'] for deg in candidate_degrees) and \ any(deg in ['bachelor', 'btech', 'be', 'bs'] for deg in required_degrees): degree_match_score = 90 else: degree_match_score = 50 else: degree_match_score = 20 else: education_present = any(keyword in resume_lower for keyword in self.education_keywords) degree_match_score = 60 if education_present else 20 # Apply undergraduate multiplier if is_undergraduate and degree_match_score > 0: degree_match_score *= year_score_multiplier # Higher semantic similarity bonus semantic_bonus = self.calculate_semantic_similarity(job_desc, resume) * 20 final_score = min(100, degree_match_score + semantic_bonus) return final_score def score_certifications(self, job_desc, resume): """Score certifications and courses (7% weight)""" resume_lower = resume.lower() job_lower = job_desc.lower() # Check for certification keywords cert_count = sum(1 for keyword in self.certification_keywords if keyword in resume_lower) # Return 0 if no certifications found if cert_count == 0: return 0 # Check for domain-specific certifications job_domain = self.detect_job_domain(job_desc) domain_cert_bonus = 0 if job_domain == 'cybersecurity': cyber_certs = ['oscp', 'cissp', 'ceh', 'giac', 'sans', 'security+'] domain_cert_bonus = sum(20 for cert in cyber_certs if cert in resume_lower) elif job_domain == 'web_development': web_certs = ['aws certified', 'google cloud', 'azure certified', 'mongodb certified'] domain_cert_bonus = sum(15 for cert in web_certs if cert in resume_lower) elif job_domain == 'data_science': data_certs = ['tensorflow developer', 'aws machine learning', 'google data engineer', 'microsoft azure ai'] domain_cert_bonus = sum(15 for cert in data_certs if cert in resume_lower) elif job_domain == 'ui_ux_design': design_certs = ['adobe certified', 'figma certified', 'ux certification', 'design thinking', 'google ux'] domain_cert_bonus = sum(15 for cert in design_certs if cert in resume_lower) elif job_domain == 'business_analysis': ba_certs = ['cbap', 'ccba', 'pmp', 'agile certified', 'scrum master', 'business analysis'] domain_cert_bonus = sum(15 for cert in ba_certs if cert in resume_lower) elif job_domain == 'marketing': marketing_certs = ['google ads', 'facebook blueprint', 'hubspot', 'google analytics', 'digital marketing'] domain_cert_bonus = sum(15 for cert in marketing_certs if cert in resume_lower) elif job_domain == 'consultancy': consulting_certs = ['pmp', 'prince2', 'change management', 'lean six sigma', 'agile certified'] domain_cert_bonus = sum(15 for cert in consulting_certs if cert in resume_lower) elif job_domain == 'ai_ml_engineering': ai_certs = ['tensorflow developer', 'aws machine learning', 'google cloud ml', 'nvidia deep learning', 'microsoft ai'] domain_cert_bonus = sum(15 for cert in ai_certs if cert in resume_lower) # More generous base score for having certifications base_score = min(60, cert_count * 25) # Relevance to job description relevance_score = self.calculate_semantic_similarity(job_desc, resume) * 30 return min(100, base_score + relevance_score + domain_cert_bonus) def classify_project_category(self, project_text): """Classify project into categories based on description""" project_lower = project_text.lower() category_scores = {} for category, keywords in self.project_categories.items(): score = sum(1 for keyword in keywords if keyword in project_lower) if score > 0: category_scores[category] = score if not category_scores: return 'general' return max(category_scores, key=category_scores.get) def extract_project_keywords(self, project_text, job_domain): """Extract technical keywords from project description""" project_lower = project_text.lower() keywords = set() # Get relevant categories based on job domain relevant_categories = [] if job_domain == 'cybersecurity': relevant_categories = ['cybersecurity', 'programming'] elif job_domain == 'web_development': relevant_categories = ['web_development', 'programming', 'databases'] elif job_domain == 'mobile_development': relevant_categories = ['mobile_development', 'programming'] elif job_domain == 'data_science': relevant_categories = ['data_science', 'programming', 'databases'] elif job_domain == 'ui_ux_design': relevant_categories = ['ui_ux_design', 'web_development'] elif job_domain == 'business_analysis': relevant_categories = ['business_analysis', 'databases'] elif job_domain == 'marketing': relevant_categories = ['marketing', 'ui_ux_design'] elif job_domain == 'consultancy': relevant_categories = ['consultancy', 'business_analysis'] elif job_domain == 'ai_ml_engineering': relevant_categories = ['ai_ml_engineering', 'data_science', 'programming'] else: relevant_categories = ['programming', 'databases', 'cloud'] # Extract keywords from relevant categories for category in relevant_categories: if category in self.skill_categories: for skill in self.skill_categories[category]: if skill in project_lower: keywords.add(skill) return keywords def score_projects(self, job_desc, resume): """Score projects with stricter keyword and category matching""" resume_lower = resume.lower() job_lower = job_desc.lower() job_domain = self.detect_job_domain(job_desc) # Extract job keywords for matching job_keywords = set(self.extract_contextual_keywords(job_desc, job_domain)) # Find project sections project_sections = [] lines = resume.split('\n') in_project_section = False current_project = "" for line in lines: line_lower = line.lower().strip() if any(keyword in line_lower for keyword in self.project_keywords): if current_project: project_sections.append(current_project) current_project = line in_project_section = True elif in_project_section: if line.strip() and not line.startswith('-') and not any(section_word in line_lower for section_word in ['experience', 'education', 'skills', 'certification']): current_project += " " + line elif line.strip().startswith('-') or not line.strip(): current_project += " " + line else: if current_project: project_sections.append(current_project) current_project = "" in_project_section = False if current_project: project_sections.append(current_project) # If no projects found, return very low score if not project_sections: project_count = sum(1 for keyword in self.project_keywords if keyword in resume_lower) return 5 if project_count > 0 else 0 # Analyze each project total_project_score = 0 project_scores = [] for project in project_sections: project_score = 0 # Step 1: Direct keyword matching (highest priority) project_keywords = self.extract_project_keywords(project, job_domain) if job_keywords: keyword_matches = len(job_keywords.intersection(project_keywords)) keyword_match_ratio = keyword_matches / len(job_keywords) if keyword_match_ratio >= 0.5: # 50% or more keywords match project_score = 80 + (keyword_match_ratio - 0.5) * 40 # 80-100 points elif keyword_match_ratio >= 0.3: # 30-49% keywords match project_score = 60 + (keyword_match_ratio - 0.3) * 100 # 60-80 points elif keyword_match_ratio >= 0.1: # 10-29% keywords match project_score = 30 + (keyword_match_ratio - 0.1) * 150 # 30-60 points elif keyword_matches > 0: # Some keywords match but less than 10% project_score = 20 else: # Step 2: Category matching (if no keyword matches) project_category = self.classify_project_category(project) # Map project categories to job domains category_domain_mapping = { 'web_development': 'web_development', 'mobile_development': 'mobile_development', 'data_science': 'data_science', 'cybersecurity': 'cybersecurity', 'game_development': 'game_development', 'devops': 'devops', 'api_backend': 'web_development', 'desktop_application': 'general', 'ui_ux_design': 'ui_ux_design', 'business_analysis': 'business_analysis', 'marketing': 'marketing', 'ai_ml_engineering': 'ai_ml_engineering' } project_domain = category_domain_mapping.get(project_category, 'general') if project_domain == job_domain: project_score = 40 # Same domain but no keyword matches elif project_domain != 'general' and job_domain != 'general': # Check domain compatibility compatibility = self.calculate_domain_compatibility(job_domain, project_domain) project_score = 20 * compatibility # 0-20 points based on compatibility else: project_score = 10 # Very low score for unrelated projects else: # If no job keywords found, use semantic similarity as fallback semantic_score = self.calculate_semantic_similarity(job_desc, project) project_score = semantic_score * 50 # Max 50 points from semantic similarity project_scores.append(project_score) # Calculate final score based on best projects if project_scores: # Take average of all projects but give more weight to best projects project_scores.sort(reverse=True) if len(project_scores) == 1: total_project_score = project_scores[0] elif len(project_scores) == 2: total_project_score = (project_scores[0] * 0.7 + project_scores[1] * 0.3) else: # For 3+ projects, weight the top 3 total_project_score = (project_scores[0] * 0.5 + project_scores[1] * 0.3 + project_scores[2] * 0.2) return min(100, total_project_score) def score_keywords_match(self, job_desc, resume): """Score keyword matching with more generous scoring""" job_domain = self.detect_job_domain(job_desc) job_keywords = self.extract_contextual_keywords(job_desc, job_domain) resume_keywords = self.extract_contextual_keywords(resume, job_domain) if not job_keywords: # More generous fallback using semantic similarity return min(70, self.calculate_semantic_similarity(job_desc, resume) * 140) matches = len(set(job_keywords).intersection(set(resume_keywords))) if matches == 0: # Give more credit for semantic similarity even with no exact matches return min(35, self.calculate_semantic_similarity(job_desc, resume) * 80) # Apply domain compatibility with more generous scoring resume_domain = self.detect_resume_domain(resume) domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain) base_score = (matches / len(job_keywords)) * 120 final_score = base_score * (0.7 + 0.3 * domain_compatibility) return min(100, final_score) def score_tools_tech(self, job_desc, resume): """Score tools and technologies with more generous scoring""" job_domain = self.detect_job_domain(job_desc) resume_domain = self.detect_resume_domain(resume) # Select relevant tech categories based on job domain if job_domain == 'cybersecurity': tech_categories = ['cybersecurity', 'programming'] elif job_domain == 'web_development': tech_categories = ['web_development', 'programming', 'databases', 'cloud'] elif job_domain == 'mobile_development': tech_categories = ['mobile_development', 'programming'] elif job_domain == 'data_science': tech_categories = ['data_science', 'programming', 'databases'] elif job_domain == 'ui_ux_design': tech_categories = ['ui_ux_design', 'web_development'] elif job_domain == 'business_analysis': tech_categories = ['business_analysis', 'databases'] elif job_domain == 'marketing': tech_categories = ['marketing', 'ui_ux_design'] elif job_domain == 'consultancy': tech_categories = ['consultancy', 'business_analysis'] elif job_domain == 'ai_ml_engineering': tech_categories = ['ai_ml_engineering', 'data_science', 'programming'] else: tech_categories = ['programming', 'databases', 'cloud'] job_tech = set() resume_tech = set() for category in tech_categories: if category in self.skill_categories: for tech in self.skill_categories[category]: if tech in job_desc.lower(): job_tech.add(tech) if tech in resume.lower(): resume_tech.add(tech) if not job_tech: # More generous fallback using semantic similarity return min(60, self.calculate_semantic_similarity(job_desc, resume) * 120) matches = len(job_tech.intersection(resume_tech)) if matches == 0: # Give more credit for having any relevant tech if resume_tech: return min(40, len(resume_tech) * 8) return 15 # Small base score instead of 0 # Apply domain compatibility with more generous scoring domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain) base_score = (matches / len(job_tech)) * 120 final_score = base_score * (0.7 + 0.3 * domain_compatibility) return min(100, final_score) def score_soft_skills(self, job_desc, resume): """Score soft skills indicators - Enhanced with interest-based inference""" resume_lower = resume.lower() job_lower = job_desc.lower() # Direct soft skills mentioned direct_soft_skills = self.skill_categories['soft_skills'] job_soft_count = sum(1 for skill in direct_soft_skills if skill in job_lower) resume_soft_count = sum(1 for skill in direct_soft_skills if skill in resume_lower) # Calculate direct soft skills score if job_soft_count > 0: direct_score = min(50, (resume_soft_count / job_soft_count) * 50) else: direct_score = min(40, resume_soft_count * 10) # Inferred soft skills from interests and activities inferred_skills = set() resume_text = resume_lower for skill_type, indicators in self.interest_skill_mapping.items(): skill_indicators_found = sum(1 for indicator in indicators if indicator in resume_text) if skill_indicators_found > 0: inferred_skills.add(skill_type) # Map inferred skills to job requirements job_skill_requirements = set() if 'leadership' in job_lower or 'lead' in job_lower or 'manage' in job_lower: job_skill_requirements.add('leadership') if 'team' in job_lower or 'collaboration' in job_lower: job_skill_requirements.add('teamwork') if 'communication' in job_lower or 'present' in job_lower: job_skill_requirements.add('communication') if 'creative' in job_lower or 'innovation' in job_lower or 'design' in job_lower: job_skill_requirements.add('creativity') if 'problem' in job_lower or 'analytical' in job_lower or 'analysis' in job_lower: job_skill_requirements.add('analytical') if 'dedicated' in job_lower or 'commitment' in job_lower: job_skill_requirements.add('dedication') if 'adapt' in job_lower or 'flexible' in job_lower: job_skill_requirements.add('adaptability') # Score inferred skills inferred_score = 0 if job_skill_requirements: matched_inferred = job_skill_requirements.intersection(inferred_skills) if matched_inferred: inferred_score = (len(matched_inferred) / len(job_skill_requirements)) * 35 else: inferred_score = min(25, len(inferred_skills) * 5) # Activity-based bonus scoring activity_bonus = 0 high_value_activities = ['ncc', 'captain', 'president', 'volunteer', 'community service', 'marathon', 'debate'] activity_count = sum(1 for activity in high_value_activities if activity in resume_lower) activity_bonus = min(15, activity_count * 3) final_score = min(100, direct_score + inferred_score + activity_bonus) return final_score def calculate_final_score(self, job_description, resume): """Calculate the weighted final score""" scores = {} # Calculate individual dimension scores scores['relevant_skills'] = self.score_relevant_skills(job_description, resume) scores['work_experience'] = self.score_work_experience(job_description, resume) scores['education'] = self.score_education(job_description, resume) scores['certifications'] = self.score_certifications(job_description, resume) scores['projects'] = self.score_projects(job_description, resume) scores['keywords_match'] = self.score_keywords_match(job_description, resume) scores['tools_tech'] = self.score_tools_tech(job_description, resume) scores['soft_skills'] = self.score_soft_skills(job_description, resume) # Calculate weighted final score final_score = sum(scores[dim] * self.weights[dim] for dim in scores) return final_score, scores # Initialize the scorer scorer = ATSScorer() def score_resume(job_description, resume_file, resume_text): """Main function to score resume against job description""" if not job_description.strip(): return "Please provide a job description.", "" # Determine resume source resume_content = "" if resume_file is not None: try: resume_content = scorer.extract_text_from_file(resume_file) if not resume_content.strip(): return "Could not extract text from the uploaded file. Please check the file format.", "" except Exception as e: return f"Error processing file: {str(e)}", "" elif resume_text.strip(): resume_content = resume_text.strip() else: return "Please provide either a resume file (PDF/DOCX) or paste resume text.", "" try: final_score, dimension_scores = scorer.calculate_final_score(job_description, resume_content) # Detect domains for additional context job_domain = scorer.detect_job_domain(job_description) resume_domain = scorer.detect_resume_domain(resume_content) domain_compatibility = scorer.calculate_domain_compatibility(job_domain, resume_domain) # Create detailed breakdown breakdown = f""" ## Overall ATS Score: {final_score:.1f}/100 ### Domain Analysis: - **Job Domain**: {job_domain.replace('_', ' ').title()} - **Resume Domain**: {resume_domain.replace('_', ' ').title()} - **Domain Compatibility**: {domain_compatibility:.1%} ### Dimension Breakdown: - **Relevant Skills** (25%): {dimension_scores['relevant_skills']:.1f}/100 - **Work Experience** (20%): {dimension_scores['work_experience']:.1f}/100 - **Education** (10%): {dimension_scores['education']:.1f}/100 - **Certifications & Courses** (7%): {dimension_scores['certifications']:.1f}/100 - **Projects** (10%): {dimension_scores['projects']:.1f}/100 - **Keywords Match** (10%): {dimension_scores['keywords_match']:.1f}/100 - **Tools & Technologies** (10%): {dimension_scores['tools_tech']:.1f}/100 - **Soft Skills Indicators** (8%): {dimension_scores['soft_skills']:.1f}/100 ### Score Interpretation: - **90-100**: Excellent match - **76-89**: Very good match - **56-75**: Good match - **45-55**: Fair match - **Below 40**: Poor match ### Recommendations: """ # Add recommendations based on low scores and domain mismatch recommendations = [] if domain_compatibility < 0.5: recommendations.append(f"- **Domain Mismatch**: Your resume appears to be focused on {resume_domain.replace('_', ' ')} while the job is in {job_domain.replace('_', ' ')}. Consider highlighting transferable skills.") if dimension_scores['relevant_skills'] < 70: recommendations.append("- **Skills**: Add more job-specific technical skills to your resume") if dimension_scores['work_experience'] < 70: recommendations.append("- **Experience**: Highlight more relevant work experience or projects") if dimension_scores['keywords_match'] < 70: recommendations.append("- **Keywords**: Include more job-specific keywords throughout your resume") if dimension_scores['tools_tech'] < 70: recommendations.append("- **Technology**: Emphasize technical tools and technologies mentioned in the job description") if dimension_scores['projects'] < 70: recommendations.append("- **Projects**: Add more relevant projects that demonstrate required skills and use job-specific technologies") if not recommendations: recommendations.append("- **Excellent!** Your resume is well-aligned with the job requirements") breakdown += "\n".join(recommendations) # Create score chart data chart_data = pd.DataFrame({ 'Dimension': [ 'Relevant Skills', 'Work Experience', 'Education', 'Certifications', 'Projects', 'Keywords Match', 'Tools & Tech', 'Soft Skills' ], 'Score': [ dimension_scores['relevant_skills'], dimension_scores['work_experience'], dimension_scores['education'], dimension_scores['certifications'], dimension_scores['projects'], dimension_scores['keywords_match'], dimension_scores['tools_tech'], dimension_scores['soft_skills'] ], 'Weight (%)': [25, 20, 10, 7, 10, 10, 10, 8] }) return breakdown, chart_data except Exception as e: return f"Error processing resume: {str(e)}", "" # Create Gradio interface with gr.Blocks(title="ATS Resume Scorer", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🎯 ATS Resume Scorer This tool evaluates how well a resume matches a job description using 8 key dimensions: - **Relevant Skills** (25%) - Match of skills to job requirements - **Work Experience** (20%) - Years and relevance of experience - **Education** (10%) - Degree relevance and performance - **Certifications & Courses** (7%) - Additional qualifications - **Projects** (10%) - Quality and relevance of projects - **Keywords Match** (10%) - Job-specific keyword alignment - **Tools & Technologies** (10%) - Technical proficiency - **Soft Skills** (8%) - Leadership, teamwork, communication **Supported Domains:** Web Development, Mobile Development, Data Science, Cybersecurity, DevOps, Game Development, UI/UX Design, Business Analysis, Marketing, Consultancy, AI/ML Engineering **📄 Resume Input:** Upload PDF/DOCX file OR paste text manually **📋 Job Description:** Paste as text """) with gr.Row(): with gr.Column(): job_desc_input = gr.Textbox( label="📋 Job Description", placeholder="Paste the complete job description here...", lines=12, max_lines=20 ) with gr.Column(): gr.Markdown("### 📄 Resume Input") with gr.Tab("Upload File (PDF/DOCX)"): resume_file_input = gr.File( label="Upload Resume", file_types=[".pdf", ".docx", ".doc"], type="filepath" ) gr.Markdown("*Supported formats: PDF, DOCX, DOC*") with gr.Tab("Paste Text"): resume_text_input = gr.Textbox( label="Resume Text", placeholder="Or paste your resume text here...", lines=10, max_lines=15 ) score_btn = gr.Button("📊 Score Resume", variant="primary", size="lg") with gr.Row(): with gr.Column(): score_output = gr.Markdown(label="Scoring Results") with gr.Column(): chart_output = gr.Dataframe( label="Dimension Scores", headers=['Dimension', 'Score', 'Weight (%)'], datatype=['str', 'number', 'number'] ) # Example inputs gr.Examples( examples=[ [ """Frontend Developer - React.js We are seeking a Frontend Developer with 2+ years of experience in React.js development. Requirements: - Bachelor's degree in Computer Science or related field - Strong proficiency in JavaScript, HTML, CSS - Experience with React.js, Redux, and modern frontend frameworks - Knowledge of responsive design and cross-browser compatibility - Experience with version control (Git) - Understanding of RESTful APIs - Strong problem-solving skills and attention to detail""", None, # No file upload in example """John Smith Frontend Developer Education: - Bachelor of Technology in Computer Science, ABC University (2020) Experience: - Frontend Developer at Tech Solutions (2021-2024, 3 years) - Developed responsive web applications using React.js and Redux - Collaborated with backend developers to integrate RESTful APIs - Implemented modern CSS frameworks and ensured cross-browser compatibility Skills: - Frontend: JavaScript, HTML5, CSS3, React.js, Redux, Vue.js - Tools: Git, Webpack, npm, VS Code - Responsive Design, Cross-browser compatibility - RESTful API integration Projects: - E-commerce Website: Built using React.js with Redux for state management - Portfolio Dashboard: Responsive web application with modern UI/UX""" ], [ """UI/UX Designer - Product Design We are seeking a UI/UX Designer with 2+ years of experience in product design and user research. Requirements: - Bachelor's degree in Design, HCI, or related field - Strong proficiency in Figma, Sketch, and Adobe Creative Suite - Experience with user research and usability testing - Knowledge of design systems and prototyping - Understanding of frontend technologies (HTML, CSS, JavaScript) - Strong visual design and interaction design skills - Experience with A/B testing and data-driven design - Excellent communication and collaboration skills""", None, # No file upload in example """Sarah Johnson UI/UX Designer Education: - Bachelor of Fine Arts in Graphic Design, Art Institute (2020) Experience: - UI/UX Designer at Design Studio (2021-2024, 3 years) - Created user interfaces and experiences for web and mobile applications - Conducted user research and usability testing sessions - Developed design systems and component libraries using Figma - Collaborated with frontend developers on implementation Skills: - Design Tools: Figma, Sketch, Adobe XD, Photoshop, Illustrator - Prototyping: InVision, Principle, Framer - Research: User interviews, A/B testing, Analytics - Frontend: HTML, CSS, JavaScript basics - Design: Visual design, Interaction design, Wireframing Projects: - E-commerce Mobile App: Designed complete user experience with user research and prototyping - SaaS Dashboard Redesign: Led design system creation and improved user engagement by 40% Certifications: - Google UX Design Certificate - Figma Advanced Certification""" ] ], inputs=[job_desc_input, resume_file_input, resume_text_input] ) score_btn.click( fn=score_resume, inputs=[job_desc_input, resume_file_input, resume_text_input], outputs=[score_output, chart_output] ) if __name__ == "__main__": demo.launch()