KavyaBansal commited on
Commit
c51354c
Β·
verified Β·
1 Parent(s): 7b6e0ac

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1187 -0
app.py ADDED
@@ -0,0 +1,1187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import re
5
+ from sentence_transformers import SentenceTransformer
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ import spacy
8
+ from collections import Counter
9
+ import json
10
+ import PyPDF2
11
+ import docx
12
+ import io
13
+ from pathlib import Path
14
+
15
+ class ATSScorer:
16
+ def __init__(self):
17
+ # Load pre-trained models
18
+ print("Loading models...")
19
+ self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
20
+
21
+ # Try to load spaCy model, fallback if not available
22
+ try:
23
+ self.nlp = spacy.load("en_core_web_sm")
24
+ except OSError:
25
+ print("spaCy model not found. Install with: python -m spacy download en_core_web_sm")
26
+ self.nlp = None
27
+
28
+ # Scoring weights from your requirements
29
+ self.weights = {
30
+ 'relevant_skills': 0.25,
31
+ 'work_experience': 0.20,
32
+ 'education': 0.10,
33
+ 'certifications': 0.07,
34
+ 'projects': 0.10,
35
+ 'keywords_match': 0.10,
36
+ 'tools_tech': 0.10,
37
+ 'soft_skills': 0.08
38
+ }
39
+
40
+ # Enhanced skill categories with domain-specific grouping
41
+ self.skill_categories = {
42
+ 'programming': ['python', 'java', 'javascript', 'c++', 'c#', 'go', 'rust', 'php', 'ruby', 'kotlin', 'swift', 'typescript', 'dart'],
43
+ 'data_science': ['machine learning', 'deep learning', 'data analysis', 'statistics', 'pandas', 'numpy', 'tensorflow', 'pytorch', 'scikit-learn', 'matplotlib', 'seaborn'],
44
+ 'web_development': ['html', 'css', 'react', 'vue', 'angular', 'node.js', 'express', 'django', 'flask', 'next.js', 'nuxt.js', 'svelte', 'bootstrap', 'tailwind'],
45
+ 'mobile_development': ['react native', 'flutter', 'android studio', 'ios', 'swift', 'kotlin', 'xamarin', 'ionic', 'cordova', 'firebase'],
46
+ 'cybersecurity': ['malware analysis', 'penetration testing', 'vulnerability assessment', 'ida pro', 'ghidra', 'wireshark', 'burp suite', 'metasploit', 'nmap', 'reverse engineering', 'oscp', 'cissp', 'ceh', 'security', 'threat', 'exploit'],
47
+ 'databases': ['sql', 'mysql', 'postgresql', 'mongodb', 'redis', 'elasticsearch', 'oracle', 'sqlite', 'cassandra', 'dynamodb'],
48
+ 'cloud': ['aws', 'azure', 'gcp', 'docker', 'kubernetes', 'terraform', 'jenkins', 'ci/cd', 'devops', 'microservices'],
49
+ 'ui_ux_design': ['figma', 'sketch', 'adobe xd', 'photoshop', 'illustrator', 'wireframing', 'prototyping', 'user research', 'usability testing', 'interaction design', 'visual design', 'design thinking', 'user journey', 'persona', 'a/b testing'],
50
+ 'business_analysis': ['business analysis', 'requirements gathering', 'stakeholder management', 'process mapping', 'gap analysis', 'user stories', 'acceptance criteria', 'brd', 'frd', 'visio', 'lucidchart', 'jira', 'confluence', 'agile', 'scrum', 'waterfall'],
51
+ 'marketing': ['digital marketing', 'content marketing', 'social media marketing', 'seo', 'sem', 'ppc', 'google ads', 'facebook ads', 'email marketing', 'marketing automation', 'analytics', 'google analytics', 'hubspot', 'salesforce', 'brand management', 'campaign management'],
52
+ 'consultancy': ['strategic planning', 'business strategy', 'change management', 'project management', 'stakeholder engagement', 'process improvement', 'risk assessment', 'financial analysis', 'market research', 'competitive analysis', 'presentation skills', 'client management'],
53
+ 'ai_ml_engineering': ['artificial intelligence', 'machine learning', 'deep learning', 'neural networks', 'nlp', 'computer vision', 'tensorflow', 'pytorch', 'keras', 'opencv', 'transformers', 'bert', 'gpt', 'llm', 'mlops', 'model deployment', 'feature engineering', 'hyperparameter tuning'],
54
+ 'soft_skills': ['leadership', 'teamwork', 'communication', 'problem solving', 'project management', 'collaboration', 'analytical', 'creative']
55
+ }
56
+
57
+ # Fixed domain indicators with better separation and priority scoring
58
+ self.domain_indicators = {
59
+ 'web_development': {
60
+ 'high_priority': ['web developer', 'frontend developer', 'backend developer', 'full stack developer', 'full-stack developer', 'web development', 'frontend development', 'backend development', 'fullstack'],
61
+ 'medium_priority': ['web', 'frontend', 'backend', 'full stack', 'website development', 'web application development', 'web app', 'spa', 'single page application'],
62
+ 'low_priority': ['html', 'css', 'javascript', 'react', 'vue', 'angular', 'node.js', 'express', 'django', 'flask', 'responsive design']
63
+ },
64
+ 'ui_ux_design': {
65
+ 'high_priority': ['ui designer', 'ux designer', 'ui/ux designer', 'product designer', 'user experience designer', 'user interface designer', 'design lead', 'visual designer'],
66
+ 'medium_priority': ['ui design', 'ux design', 'user experience', 'user interface', 'interaction design', 'visual design', 'product design'],
67
+ 'low_priority': ['figma', 'sketch', 'adobe xd', 'wireframing', 'prototyping', 'user research', 'usability testing']
68
+ },
69
+ 'mobile_development': {
70
+ 'high_priority': ['mobile developer', 'android developer', 'ios developer', 'mobile app developer', 'app developer'],
71
+ 'medium_priority': ['mobile', 'android', 'ios', 'app development', 'mobile application', 'cross-platform'],
72
+ 'low_priority': ['react native', 'flutter', 'swift', 'kotlin', 'xamarin']
73
+ },
74
+ 'data_science': {
75
+ 'high_priority': ['data scientist', 'data analyst', 'machine learning engineer', 'data engineer'],
76
+ 'medium_priority': ['data science', 'machine learning', 'analytics', 'data analysis', 'ai', 'artificial intelligence'],
77
+ 'low_priority': ['python', 'pandas', 'numpy', 'tensorflow', 'pytorch']
78
+ },
79
+ 'cybersecurity': {
80
+ 'high_priority': ['security analyst', 'cybersecurity specialist', 'security engineer', 'penetration tester', 'security researcher'],
81
+ 'medium_priority': ['security', 'malware', 'vulnerability', 'penetration', 'threat', 'exploit', 'cybersecurity', 'infosec', 'reverse engineering'],
82
+ 'low_priority': ['wireshark', 'burp suite', 'metasploit', 'nmap']
83
+ },
84
+ 'devops': {
85
+ 'high_priority': ['devops engineer', 'site reliability engineer', 'infrastructure engineer', 'cloud engineer'],
86
+ 'medium_priority': ['devops', 'infrastructure', 'deployment', 'ci/cd', 'automation', 'cloud'],
87
+ 'low_priority': ['docker', 'kubernetes', 'terraform', 'jenkins']
88
+ },
89
+ 'game_development': {
90
+ 'high_priority': ['game developer', 'game programmer', 'unity developer', 'unreal developer'],
91
+ 'medium_priority': ['game', 'unity', 'unreal', 'gaming', 'game development', '3d', 'graphics'],
92
+ 'low_priority': ['c#', 'c++', 'opengl', 'directx']
93
+ },
94
+ 'business_analysis': {
95
+ 'high_priority': ['business analyst', 'systems analyst', 'functional analyst', 'requirements analyst'],
96
+ 'medium_priority': ['business analysis', 'requirements', 'stakeholder', 'process', 'analyst', 'functional requirements', 'business requirements'],
97
+ 'low_priority': ['jira', 'confluence', 'visio', 'lucidchart']
98
+ },
99
+ 'marketing': {
100
+ 'high_priority': ['marketing manager', 'digital marketing specialist', 'marketing analyst', 'content marketer'],
101
+ 'medium_priority': ['marketing', 'digital marketing', 'content marketing', 'social media', 'seo', 'brand', 'campaign', 'advertising', 'promotion', 'market research'],
102
+ 'low_priority': ['google ads', 'facebook ads', 'hubspot', 'salesforce']
103
+ },
104
+ 'consultancy': {
105
+ 'high_priority': ['consultant', 'management consultant', 'strategy consultant', 'business consultant'],
106
+ 'medium_priority': ['consulting', 'advisory', 'strategy', 'strategic', 'transformation', 'change management', 'business consulting', 'management consulting'],
107
+ 'low_priority': ['powerpoint', 'excel', 'presentation']
108
+ },
109
+ 'ai_ml_engineering': {
110
+ 'high_priority': ['ai engineer', 'ml engineer', 'machine learning engineer', 'ai specialist', 'nlp engineer'],
111
+ 'medium_priority': ['artificial intelligence', 'deep learning', 'neural networks', 'nlp engineer', 'computer vision', 'mlops'],
112
+ 'low_priority': ['tensorflow', 'pytorch', 'keras', 'opencv']
113
+ }
114
+ }
115
+
116
+ self.education_keywords = ['bachelor', 'master', 'phd', 'degree', 'university', 'college', 'education', 'graduated']
117
+ self.certification_keywords = ['certified', 'certification', 'certificate', 'licensed', 'accredited']
118
+ self.project_keywords = ['project', 'developed', 'built', 'created', 'implemented', 'designed']
119
+
120
+ # Extended education patterns for undergraduates
121
+ self.education_patterns = {
122
+ 'undergraduate': ['undergraduate', 'pursuing', 'currently enrolled', 'final year', 'third year', 'fourth year', 'sophomore', 'junior', 'senior'],
123
+ 'year_indicators': ['first year', 'second year', 'third year', 'fourth year', 'final year', 'sophomore', 'junior', 'senior'],
124
+ 'degree_types': ['bachelor', 'bs', 'ba', 'btech', 'bsc', 'be', 'master', 'ms', 'ma', 'mtech', 'msc', 'phd', 'doctorate', 'mba', 'bba', 'bfa', 'mfa']
125
+ }
126
+
127
+ # Soft skills inference from interests and activities
128
+ self.interest_skill_mapping = {
129
+ 'creativity': ['art', 'drawing', 'painting', 'design', 'photography', 'music', 'writing', 'creative', 'sketch'],
130
+ 'leadership': ['captain', 'president', 'head', 'leader', 'coordinator', 'organizer', 'mentor', 'ncc', 'scouts'],
131
+ 'teamwork': ['team', 'collaboration', 'group projects', 'sports', 'football', 'basketball', 'cricket', 'volleyball'],
132
+ 'dedication': ['marathon', 'athletics', 'gym', 'fitness', 'ncc', 'volunteer', 'community service', 'consistent'],
133
+ 'analytical': ['chess', 'puzzle', 'mathematics', 'strategy', 'analysis', 'research', 'debate'],
134
+ 'communication': ['debate', 'public speaking', 'presentation', 'writing', 'blog', 'theater', 'drama'],
135
+ 'adaptability': ['travel', 'different cultures', 'international', 'languages', 'diverse'],
136
+ 'persistence': ['marathon', 'long distance', 'endurance', 'consistent', 'regular', 'discipline']
137
+ }
138
+
139
+ # Project category patterns for better classification
140
+ self.project_categories = {
141
+ 'web_development': [
142
+ 'website', 'web app', 'web application', 'e-commerce', 'blog', 'portfolio', 'dashboard',
143
+ 'frontend', 'backend', 'full stack', 'responsive', 'landing page', 'cms',
144
+ 'online store', 'booking system', 'social media', 'chat app', 'forum'
145
+ ],
146
+ 'mobile_development': [
147
+ 'mobile app', 'android app', 'ios app', 'flutter app', 'react native', 'mobile application',
148
+ 'app development', 'cross-platform', 'native app', 'hybrid app', 'mobile game'
149
+ ],
150
+ 'data_science': [
151
+ 'machine learning', 'data analysis', 'prediction model', 'recommendation system',
152
+ 'data visualization', 'analytics', 'ai model', 'neural network', 'classification',
153
+ 'regression', 'clustering', 'sentiment analysis', 'nlp', 'computer vision'
154
+ ],
155
+ 'cybersecurity': [
156
+ 'security tool', 'vulnerability scanner', 'penetration testing', 'malware analysis',
157
+ 'encryption', 'security audit', 'threat detection', 'firewall', 'intrusion detection',
158
+ 'security framework', 'ethical hacking', 'forensics'
159
+ ],
160
+ 'game_development': [
161
+ 'game', 'unity', 'unreal', '2d game', '3d game', 'mobile game', 'web game',
162
+ 'game engine', 'graphics', 'animation', 'gameplay', 'level design'
163
+ ],
164
+ 'devops': [
165
+ 'ci/cd', 'deployment', 'automation', 'infrastructure', 'monitoring', 'containerization',
166
+ 'orchestration', 'pipeline', 'cloud deployment', 'server management'
167
+ ],
168
+ 'desktop_application': [
169
+ 'desktop app', 'gui application', 'desktop software', 'system tool', 'utility',
170
+ 'desktop game', 'productivity tool', 'file manager', 'text editor'
171
+ ],
172
+ 'api_backend': [
173
+ 'api', 'rest api', 'backend service', 'microservice', 'web service', 'server',
174
+ 'database integration', 'authentication system', 'payment gateway'
175
+ ],
176
+ 'ui_ux_design': [
177
+ 'ui design', 'ux design', 'user interface', 'user experience', 'wireframe', 'prototype',
178
+ 'mockup', 'design system', 'user research', 'usability testing', 'interaction design',
179
+ 'visual design', 'app design', 'website design'
180
+ ],
181
+ 'business_analysis': [
182
+ 'business analysis', 'requirements gathering', 'process mapping', 'workflow design',
183
+ 'business process', 'system analysis', 'gap analysis', 'stakeholder analysis',
184
+ 'business requirements', 'functional requirements'
185
+ ],
186
+ 'marketing': [
187
+ 'marketing campaign', 'digital marketing', 'social media campaign', 'content strategy',
188
+ 'seo optimization', 'brand campaign', 'market research', 'customer analysis',
189
+ 'marketing automation', 'email campaign'
190
+ ],
191
+ 'ai_ml_engineering': [
192
+ 'ai system', 'ml pipeline', 'deep learning model', 'neural network', 'nlp system',
193
+ 'computer vision', 'recommendation engine', 'chatbot', 'ai application',
194
+ 'model deployment', 'mlops', 'feature engineering'
195
+ ]
196
+ }
197
+
198
+ def extract_text_from_pdf(self, pdf_file):
199
+ """Extract text from PDF file"""
200
+ try:
201
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
202
+ text = ""
203
+ for page in pdf_reader.pages:
204
+ text += page.extract_text() + "\n"
205
+ return text.strip()
206
+ except Exception as e:
207
+ raise Exception(f"Error reading PDF: {str(e)}")
208
+
209
+ def extract_text_from_docx(self, docx_file):
210
+ """Extract text from DOCX file"""
211
+ try:
212
+ doc = docx.Document(docx_file)
213
+ text = ""
214
+ for paragraph in doc.paragraphs:
215
+ text += paragraph.text + "\n"
216
+ return text.strip()
217
+ except Exception as e:
218
+ raise Exception(f"Error reading DOCX: {str(e)}")
219
+
220
+ def extract_text_from_file(self, file):
221
+ """Extract text from uploaded file (PDF or DOCX)"""
222
+ if file is None:
223
+ return ""
224
+
225
+ file_path = Path(file.name)
226
+ file_extension = file_path.suffix.lower()
227
+
228
+ try:
229
+ if file_extension == '.pdf':
230
+ return self.extract_text_from_pdf(file.name)
231
+ elif file_extension in ['.docx', '.doc']:
232
+ return self.extract_text_from_docx(file.name)
233
+ else:
234
+ raise Exception(f"Unsupported file format: {file_extension}. Please upload PDF or DOCX files.")
235
+ except Exception as e:
236
+ raise Exception(f"Error processing file: {str(e)}")
237
+
238
+ def preprocess_text(self, text):
239
+ """Clean and preprocess text"""
240
+ if not text:
241
+ return ""
242
+ text = text.lower().strip()
243
+ # Remove extra whitespace
244
+ text = re.sub(r'\s+', ' ', text)
245
+ return text
246
+
247
+ def detect_job_domain(self, job_desc):
248
+ """Detect the primary domain of the job with improved priority-based scoring"""
249
+ job_lower = job_desc.lower()
250
+
251
+ domain_scores = {}
252
+
253
+ for domain, indicators in self.domain_indicators.items():
254
+ score = 0
255
+
256
+ # High priority indicators (job titles, specific roles) - weight 10
257
+ for indicator in indicators['high_priority']:
258
+ if indicator in job_lower:
259
+ score += 10
260
+
261
+ # Medium priority indicators (domain-specific terms) - weight 3
262
+ for indicator in indicators['medium_priority']:
263
+ if indicator in job_lower:
264
+ score += 3
265
+
266
+ # Low priority indicators (tools, technologies) - weight 1
267
+ for indicator in indicators['low_priority']:
268
+ if indicator in job_lower:
269
+ score += 1
270
+
271
+ domain_scores[domain] = score
272
+
273
+ # Return the domain with highest score, or 'general' if no matches
274
+ if max(domain_scores.values()) > 0:
275
+ return max(domain_scores, key=domain_scores.get)
276
+ else:
277
+ return 'general'
278
+
279
+ def detect_resume_domain(self, resume):
280
+ """Detect the primary domain of the resume with improved priority-based scoring"""
281
+ resume_lower = resume.lower()
282
+
283
+ domain_scores = {}
284
+
285
+ for domain, indicators in self.domain_indicators.items():
286
+ score = 0
287
+
288
+ # High priority indicators (job titles, specific roles) - weight 10
289
+ for indicator in indicators['high_priority']:
290
+ if indicator in resume_lower:
291
+ score += 10
292
+
293
+ # Medium priority indicators (domain-specific terms) - weight 3
294
+ for indicator in indicators['medium_priority']:
295
+ if indicator in resume_lower:
296
+ score += 3
297
+
298
+ # Low priority indicators (tools, technologies) - weight 1
299
+ for indicator in indicators['low_priority']:
300
+ if indicator in resume_lower:
301
+ score += 1
302
+
303
+ domain_scores[domain] = score
304
+
305
+ # Return the domain with highest score, or 'general' if no matches
306
+ if max(domain_scores.values()) > 0:
307
+ return max(domain_scores, key=domain_scores.get)
308
+ else:
309
+ return 'general'
310
+
311
+ def calculate_domain_compatibility(self, job_domain, resume_domain):
312
+ """Calculate compatibility between job and resume domains"""
313
+ if job_domain == resume_domain:
314
+ return 1.0
315
+
316
+ # More generous domain compatibility matrix
317
+ compatibility_matrix = {
318
+ ('cybersecurity', 'web_development'): 0.7,
319
+ ('cybersecurity', 'mobile_development'): 0.6,
320
+ ('cybersecurity', 'data_science'): 0.8,
321
+ ('cybersecurity', 'ai_ml_engineering'): 0.8,
322
+ ('web_development', 'mobile_development'): 0.9,
323
+ ('web_development', 'data_science'): 0.8,
324
+ ('web_development', 'ui_ux_design'): 0.9,
325
+ ('mobile_development', 'data_science'): 0.7,
326
+ ('mobile_development', 'ui_ux_design'): 0.8,
327
+ ('devops', 'web_development'): 0.8,
328
+ ('devops', 'cybersecurity'): 0.7,
329
+ ('devops', 'ai_ml_engineering'): 0.8,
330
+ ('game_development', 'web_development'): 0.7,
331
+ ('game_development', 'mobile_development'): 0.8,
332
+ ('ui_ux_design', 'web_development'): 0.9,
333
+ ('ui_ux_design', 'mobile_development'): 0.8,
334
+ ('ui_ux_design', 'marketing'): 0.7,
335
+ ('business_analysis', 'consultancy'): 0.9,
336
+ ('business_analysis', 'marketing'): 0.7,
337
+ ('business_analysis', 'data_science'): 0.7,
338
+ ('marketing', 'consultancy'): 0.8,
339
+ ('marketing', 'business_analysis'): 0.7,
340
+ ('marketing', 'ui_ux_design'): 0.7,
341
+ ('consultancy', 'business_analysis'): 0.9,
342
+ ('consultancy', 'marketing'): 0.8,
343
+ ('ai_ml_engineering', 'data_science'): 0.95,
344
+ ('ai_ml_engineering', 'web_development'): 0.8,
345
+ ('ai_ml_engineering', 'cybersecurity'): 0.8,
346
+ ('data_science', 'ai_ml_engineering'): 0.95,
347
+ }
348
+
349
+ # Check both directions
350
+ compatibility = compatibility_matrix.get((job_domain, resume_domain),
351
+ compatibility_matrix.get((resume_domain, job_domain), 0.5))
352
+
353
+ return compatibility
354
+
355
+ def extract_years_of_experience(self, text):
356
+ """Extract years of experience from text"""
357
+ text = text.lower()
358
+ patterns = [
359
+ r'(\d+)\+?\s*years?\s+(?:of\s+)?experience',
360
+ r'(\d+)\+?\s*yrs?\s+(?:of\s+)?experience',
361
+ r'experience.*?(\d+)\+?\s*years?',
362
+ r'(\d+)\+?\s*years?\s+in\s+'
363
+ ]
364
+
365
+ years = []
366
+ for pattern in patterns:
367
+ matches = re.findall(pattern, text)
368
+ years.extend([int(match) for match in matches])
369
+
370
+ return max(years) if years else 0
371
+
372
+ def extract_contextual_keywords(self, text, job_domain="general"):
373
+ """Extract keywords with domain context awareness"""
374
+ text = self.preprocess_text(text)
375
+ keywords = set()
376
+
377
+ # Get relevant categories based on domain
378
+ relevant_categories = []
379
+ if job_domain == 'cybersecurity':
380
+ relevant_categories = ['cybersecurity', 'programming']
381
+ elif job_domain == 'web_development':
382
+ relevant_categories = ['web_development', 'programming', 'databases']
383
+ elif job_domain == 'mobile_development':
384
+ relevant_categories = ['mobile_development', 'programming']
385
+ elif job_domain == 'data_science':
386
+ relevant_categories = ['data_science', 'programming', 'databases']
387
+ elif job_domain == 'ui_ux_design':
388
+ relevant_categories = ['ui_ux_design', 'web_development']
389
+ elif job_domain == 'business_analysis':
390
+ relevant_categories = ['business_analysis', 'databases']
391
+ elif job_domain == 'marketing':
392
+ relevant_categories = ['marketing', 'ui_ux_design']
393
+ elif job_domain == 'consultancy':
394
+ relevant_categories = ['consultancy', 'business_analysis']
395
+ elif job_domain == 'ai_ml_engineering':
396
+ relevant_categories = ['ai_ml_engineering', 'data_science', 'programming']
397
+ else:
398
+ relevant_categories = ['programming', 'databases', 'cloud', 'web_development']
399
+
400
+ # Extract keywords from relevant categories
401
+ for category in relevant_categories:
402
+ if category in self.skill_categories:
403
+ for skill in self.skill_categories[category]:
404
+ if skill in text:
405
+ keywords.add(skill)
406
+
407
+ # Use spaCy for entity extraction if available
408
+ if self.nlp:
409
+ doc = self.nlp(text)
410
+ for ent in doc.ents:
411
+ if ent.label_ in ['ORG', 'PRODUCT', 'LANGUAGE']:
412
+ keywords.add(ent.text.lower())
413
+
414
+ return list(keywords)
415
+
416
+ def calculate_semantic_similarity(self, text1, text2):
417
+ """Calculate semantic similarity between two texts with lower threshold"""
418
+ if not text1 or not text2:
419
+ return 0.0
420
+
421
+ embeddings = self.sentence_model.encode([text1, text2])
422
+ similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
423
+
424
+ # Lower threshold for more inclusive matching
425
+ if similarity < 0.15:
426
+ return 0.0
427
+
428
+ return max(0, similarity)
429
+
430
+ def score_relevant_skills(self, job_desc, resume):
431
+ """Score skill relevance with more generous scoring"""
432
+ job_domain = self.detect_job_domain(job_desc)
433
+ resume_domain = self.detect_resume_domain(resume)
434
+
435
+ job_keywords = set(self.extract_contextual_keywords(job_desc, job_domain))
436
+ resume_keywords = set(self.extract_contextual_keywords(resume, job_domain))
437
+
438
+ if not job_keywords:
439
+ # More generous fallback using semantic similarity
440
+ semantic_score = self.calculate_semantic_similarity(job_desc, resume) * 120
441
+ return min(80, semantic_score)
442
+
443
+ # Exact keyword matching
444
+ exact_matches = len(job_keywords.intersection(resume_keywords))
445
+ exact_score = exact_matches / len(job_keywords)
446
+
447
+ # Semantic similarity with higher weight
448
+ semantic_score = self.calculate_semantic_similarity(job_desc, resume)
449
+
450
+ # More generous base scoring
451
+ base_score = (exact_score * 0.6 + semantic_score * 0.4) * 120
452
+
453
+ # Apply domain compatibility with minimal penalty
454
+ domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
455
+ final_score = base_score * (0.7 + 0.3 * domain_compatibility) # Minimum 70% of base score
456
+
457
+ return min(100, final_score)
458
+
459
+ def score_work_experience(self, job_desc, resume):
460
+ """Score work experience with more generous scoring"""
461
+ resume_years = self.extract_years_of_experience(resume)
462
+ job_years = self.extract_years_of_experience(job_desc)
463
+
464
+ job_domain = self.detect_job_domain(job_desc)
465
+ resume_domain = self.detect_resume_domain(resume)
466
+
467
+ # Years of experience score
468
+ if job_years > 0:
469
+ years_score = min(100, (resume_years / job_years) * 120)
470
+ else:
471
+ years_score = 60 if resume_years > 0 else 20
472
+
473
+ # Domain-aware semantic similarity
474
+ semantic_score = self.calculate_semantic_similarity(job_desc, resume) * 120
475
+
476
+ # Apply domain compatibility
477
+ domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
478
+
479
+ # Combine scores with more generous weighting
480
+ base_score = (years_score * 0.4 + semantic_score * 0.6)
481
+ final_score = base_score * (0.7 + 0.3 * domain_compatibility)
482
+
483
+ return min(100, final_score)
484
+
485
+ def score_education(self, job_desc, resume):
486
+ """Score education relevance - Enhanced for undergraduates"""
487
+ resume_lower = resume.lower()
488
+ job_lower = job_desc.lower()
489
+
490
+ # Extract required degree from job description
491
+ required_degrees = []
492
+ for degree_type in self.education_patterns['degree_types']:
493
+ if degree_type in job_lower:
494
+ required_degrees.append(degree_type)
495
+
496
+ # Check if candidate is undergraduate
497
+ is_undergraduate = any(pattern in resume_lower for pattern in self.education_patterns['undergraduate'])
498
+
499
+ # Determine candidate's year if undergraduate
500
+ year_score_multiplier = 1.0
501
+ if is_undergraduate:
502
+ if any(year in resume_lower for year in ['final year', 'fourth year', 'senior']):
503
+ year_score_multiplier = 0.95
504
+ elif any(year in resume_lower for year in ['third year', 'junior']):
505
+ year_score_multiplier = 0.85
506
+ elif any(year in resume_lower for year in ['second year', 'sophomore']):
507
+ year_score_multiplier = 0.70
508
+ elif any(year in resume_lower for year in ['first year', 'freshman']):
509
+ year_score_multiplier = 0.55
510
+
511
+ # Check degree match with more generous scoring
512
+ degree_match_score = 0
513
+ if required_degrees:
514
+ candidate_degrees = []
515
+ for degree_type in self.education_patterns['degree_types']:
516
+ if degree_type in resume_lower:
517
+ candidate_degrees.append(degree_type)
518
+
519
+ if candidate_degrees:
520
+ if any(req_deg in candidate_degrees for req_deg in required_degrees):
521
+ degree_match_score = 85
522
+ elif any(deg in ['btech', 'be', 'bs', 'bachelor'] for deg in candidate_degrees) and \
523
+ any(deg in ['bachelor', 'btech', 'be', 'bs'] for deg in required_degrees):
524
+ degree_match_score = 80
525
+ elif any(deg in ['master', 'ms', 'ma', 'mtech', 'mba'] for deg in candidate_degrees) and \
526
+ any(deg in ['bachelor', 'btech', 'be', 'bs'] for deg in required_degrees):
527
+ degree_match_score = 90
528
+ else:
529
+ degree_match_score = 50
530
+ else:
531
+ degree_match_score = 20
532
+ else:
533
+ education_present = any(keyword in resume_lower for keyword in self.education_keywords)
534
+ degree_match_score = 60 if education_present else 20
535
+
536
+ # Apply undergraduate multiplier
537
+ if is_undergraduate and degree_match_score > 0:
538
+ degree_match_score *= year_score_multiplier
539
+
540
+ # Higher semantic similarity bonus
541
+ semantic_bonus = self.calculate_semantic_similarity(job_desc, resume) * 20
542
+
543
+ final_score = min(100, degree_match_score + semantic_bonus)
544
+ return final_score
545
+
546
+ def score_certifications(self, job_desc, resume):
547
+ """Score certifications and courses (7% weight)"""
548
+ resume_lower = resume.lower()
549
+ job_lower = job_desc.lower()
550
+
551
+ # Check for certification keywords
552
+ cert_count = sum(1 for keyword in self.certification_keywords if keyword in resume_lower)
553
+
554
+ # Return 0 if no certifications found
555
+ if cert_count == 0:
556
+ return 0
557
+
558
+ # Check for domain-specific certifications
559
+ job_domain = self.detect_job_domain(job_desc)
560
+ domain_cert_bonus = 0
561
+
562
+ if job_domain == 'cybersecurity':
563
+ cyber_certs = ['oscp', 'cissp', 'ceh', 'giac', 'sans', 'security+']
564
+ domain_cert_bonus = sum(20 for cert in cyber_certs if cert in resume_lower)
565
+ elif job_domain == 'web_development':
566
+ web_certs = ['aws certified', 'google cloud', 'azure certified', 'mongodb certified']
567
+ domain_cert_bonus = sum(15 for cert in web_certs if cert in resume_lower)
568
+ elif job_domain == 'data_science':
569
+ data_certs = ['tensorflow developer', 'aws machine learning', 'google data engineer', 'microsoft azure ai']
570
+ domain_cert_bonus = sum(15 for cert in data_certs if cert in resume_lower)
571
+ elif job_domain == 'ui_ux_design':
572
+ design_certs = ['adobe certified', 'figma certified', 'ux certification', 'design thinking', 'google ux']
573
+ domain_cert_bonus = sum(15 for cert in design_certs if cert in resume_lower)
574
+ elif job_domain == 'business_analysis':
575
+ ba_certs = ['cbap', 'ccba', 'pmp', 'agile certified', 'scrum master', 'business analysis']
576
+ domain_cert_bonus = sum(15 for cert in ba_certs if cert in resume_lower)
577
+ elif job_domain == 'marketing':
578
+ marketing_certs = ['google ads', 'facebook blueprint', 'hubspot', 'google analytics', 'digital marketing']
579
+ domain_cert_bonus = sum(15 for cert in marketing_certs if cert in resume_lower)
580
+ elif job_domain == 'consultancy':
581
+ consulting_certs = ['pmp', 'prince2', 'change management', 'lean six sigma', 'agile certified']
582
+ domain_cert_bonus = sum(15 for cert in consulting_certs if cert in resume_lower)
583
+ elif job_domain == 'ai_ml_engineering':
584
+ ai_certs = ['tensorflow developer', 'aws machine learning', 'google cloud ml', 'nvidia deep learning', 'microsoft ai']
585
+ domain_cert_bonus = sum(15 for cert in ai_certs if cert in resume_lower)
586
+
587
+ # More generous base score for having certifications
588
+ base_score = min(60, cert_count * 25)
589
+
590
+ # Relevance to job description
591
+ relevance_score = self.calculate_semantic_similarity(job_desc, resume) * 30
592
+
593
+ return min(100, base_score + relevance_score + domain_cert_bonus)
594
+
595
+ def classify_project_category(self, project_text):
596
+ """Classify project into categories based on description"""
597
+ project_lower = project_text.lower()
598
+
599
+ category_scores = {}
600
+ for category, keywords in self.project_categories.items():
601
+ score = sum(1 for keyword in keywords if keyword in project_lower)
602
+ if score > 0:
603
+ category_scores[category] = score
604
+
605
+ if not category_scores:
606
+ return 'general'
607
+
608
+ return max(category_scores, key=category_scores.get)
609
+
610
+ def extract_project_keywords(self, project_text, job_domain):
611
+ """Extract technical keywords from project description"""
612
+ project_lower = project_text.lower()
613
+ keywords = set()
614
+
615
+ # Get relevant categories based on job domain
616
+ relevant_categories = []
617
+ if job_domain == 'cybersecurity':
618
+ relevant_categories = ['cybersecurity', 'programming']
619
+ elif job_domain == 'web_development':
620
+ relevant_categories = ['web_development', 'programming', 'databases']
621
+ elif job_domain == 'mobile_development':
622
+ relevant_categories = ['mobile_development', 'programming']
623
+ elif job_domain == 'data_science':
624
+ relevant_categories = ['data_science', 'programming', 'databases']
625
+ elif job_domain == 'ui_ux_design':
626
+ relevant_categories = ['ui_ux_design', 'web_development']
627
+ elif job_domain == 'business_analysis':
628
+ relevant_categories = ['business_analysis', 'databases']
629
+ elif job_domain == 'marketing':
630
+ relevant_categories = ['marketing', 'ui_ux_design']
631
+ elif job_domain == 'consultancy':
632
+ relevant_categories = ['consultancy', 'business_analysis']
633
+ elif job_domain == 'ai_ml_engineering':
634
+ relevant_categories = ['ai_ml_engineering', 'data_science', 'programming']
635
+ else:
636
+ relevant_categories = ['programming', 'databases', 'cloud']
637
+
638
+ # Extract keywords from relevant categories
639
+ for category in relevant_categories:
640
+ if category in self.skill_categories:
641
+ for skill in self.skill_categories[category]:
642
+ if skill in project_lower:
643
+ keywords.add(skill)
644
+
645
+ return keywords
646
+
647
+ def score_projects(self, job_desc, resume):
648
+ """Score projects with stricter keyword and category matching"""
649
+ resume_lower = resume.lower()
650
+ job_lower = job_desc.lower()
651
+
652
+ job_domain = self.detect_job_domain(job_desc)
653
+
654
+ # Extract job keywords for matching
655
+ job_keywords = set(self.extract_contextual_keywords(job_desc, job_domain))
656
+
657
+ # Find project sections
658
+ project_sections = []
659
+ lines = resume.split('\n')
660
+ in_project_section = False
661
+ current_project = ""
662
+
663
+ for line in lines:
664
+ line_lower = line.lower().strip()
665
+ if any(keyword in line_lower for keyword in self.project_keywords):
666
+ if current_project:
667
+ project_sections.append(current_project)
668
+ current_project = line
669
+ in_project_section = True
670
+ elif in_project_section:
671
+ if line.strip() and not line.startswith('-') and not any(section_word in line_lower for section_word in ['experience', 'education', 'skills', 'certification']):
672
+ current_project += " " + line
673
+ elif line.strip().startswith('-') or not line.strip():
674
+ current_project += " " + line
675
+ else:
676
+ if current_project:
677
+ project_sections.append(current_project)
678
+ current_project = ""
679
+ in_project_section = False
680
+
681
+ if current_project:
682
+ project_sections.append(current_project)
683
+
684
+ # If no projects found, return very low score
685
+ if not project_sections:
686
+ project_count = sum(1 for keyword in self.project_keywords if keyword in resume_lower)
687
+ return 5 if project_count > 0 else 0
688
+
689
+ # Analyze each project
690
+ total_project_score = 0
691
+ project_scores = []
692
+
693
+ for project in project_sections:
694
+ project_score = 0
695
+
696
+ # Step 1: Direct keyword matching (highest priority)
697
+ project_keywords = self.extract_project_keywords(project, job_domain)
698
+ if job_keywords:
699
+ keyword_matches = len(job_keywords.intersection(project_keywords))
700
+ keyword_match_ratio = keyword_matches / len(job_keywords)
701
+
702
+ if keyword_match_ratio >= 0.5: # 50% or more keywords match
703
+ project_score = 80 + (keyword_match_ratio - 0.5) * 40 # 80-100 points
704
+ elif keyword_match_ratio >= 0.3: # 30-49% keywords match
705
+ project_score = 60 + (keyword_match_ratio - 0.3) * 100 # 60-80 points
706
+ elif keyword_match_ratio >= 0.1: # 10-29% keywords match
707
+ project_score = 30 + (keyword_match_ratio - 0.1) * 150 # 30-60 points
708
+ elif keyword_matches > 0: # Some keywords match but less than 10%
709
+ project_score = 20
710
+ else:
711
+ # Step 2: Category matching (if no keyword matches)
712
+ project_category = self.classify_project_category(project)
713
+
714
+ # Map project categories to job domains
715
+ category_domain_mapping = {
716
+ 'web_development': 'web_development',
717
+ 'mobile_development': 'mobile_development',
718
+ 'data_science': 'data_science',
719
+ 'cybersecurity': 'cybersecurity',
720
+ 'game_development': 'game_development',
721
+ 'devops': 'devops',
722
+ 'api_backend': 'web_development',
723
+ 'desktop_application': 'general',
724
+ 'ui_ux_design': 'ui_ux_design',
725
+ 'business_analysis': 'business_analysis',
726
+ 'marketing': 'marketing',
727
+ 'ai_ml_engineering': 'ai_ml_engineering'
728
+ }
729
+
730
+ project_domain = category_domain_mapping.get(project_category, 'general')
731
+
732
+ if project_domain == job_domain:
733
+ project_score = 40 # Same domain but no keyword matches
734
+ elif project_domain != 'general' and job_domain != 'general':
735
+ # Check domain compatibility
736
+ compatibility = self.calculate_domain_compatibility(job_domain, project_domain)
737
+ project_score = 20 * compatibility # 0-20 points based on compatibility
738
+ else:
739
+ project_score = 10 # Very low score for unrelated projects
740
+ else:
741
+ # If no job keywords found, use semantic similarity as fallback
742
+ semantic_score = self.calculate_semantic_similarity(job_desc, project)
743
+ project_score = semantic_score * 50 # Max 50 points from semantic similarity
744
+
745
+ project_scores.append(project_score)
746
+
747
+ # Calculate final score based on best projects
748
+ if project_scores:
749
+ # Take average of all projects but give more weight to best projects
750
+ project_scores.sort(reverse=True)
751
+ if len(project_scores) == 1:
752
+ total_project_score = project_scores[0]
753
+ elif len(project_scores) == 2:
754
+ total_project_score = (project_scores[0] * 0.7 + project_scores[1] * 0.3)
755
+ else:
756
+ # For 3+ projects, weight the top 3
757
+ total_project_score = (project_scores[0] * 0.5 +
758
+ project_scores[1] * 0.3 +
759
+ project_scores[2] * 0.2)
760
+
761
+ return min(100, total_project_score)
762
+
763
+ def score_keywords_match(self, job_desc, resume):
764
+ """Score keyword matching with more generous scoring"""
765
+ job_domain = self.detect_job_domain(job_desc)
766
+
767
+ job_keywords = self.extract_contextual_keywords(job_desc, job_domain)
768
+ resume_keywords = self.extract_contextual_keywords(resume, job_domain)
769
+
770
+ if not job_keywords:
771
+ # More generous fallback using semantic similarity
772
+ return min(70, self.calculate_semantic_similarity(job_desc, resume) * 140)
773
+
774
+ matches = len(set(job_keywords).intersection(set(resume_keywords)))
775
+
776
+ if matches == 0:
777
+ # Give more credit for semantic similarity even with no exact matches
778
+ return min(35, self.calculate_semantic_similarity(job_desc, resume) * 80)
779
+
780
+ # Apply domain compatibility with more generous scoring
781
+ resume_domain = self.detect_resume_domain(resume)
782
+ domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
783
+
784
+ base_score = (matches / len(job_keywords)) * 120
785
+ final_score = base_score * (0.7 + 0.3 * domain_compatibility)
786
+
787
+ return min(100, final_score)
788
+
789
+ def score_tools_tech(self, job_desc, resume):
790
+ """Score tools and technologies with more generous scoring"""
791
+ job_domain = self.detect_job_domain(job_desc)
792
+ resume_domain = self.detect_resume_domain(resume)
793
+
794
+ # Select relevant tech categories based on job domain
795
+ if job_domain == 'cybersecurity':
796
+ tech_categories = ['cybersecurity', 'programming']
797
+ elif job_domain == 'web_development':
798
+ tech_categories = ['web_development', 'programming', 'databases', 'cloud']
799
+ elif job_domain == 'mobile_development':
800
+ tech_categories = ['mobile_development', 'programming']
801
+ elif job_domain == 'data_science':
802
+ tech_categories = ['data_science', 'programming', 'databases']
803
+ elif job_domain == 'ui_ux_design':
804
+ tech_categories = ['ui_ux_design', 'web_development']
805
+ elif job_domain == 'business_analysis':
806
+ tech_categories = ['business_analysis', 'databases']
807
+ elif job_domain == 'marketing':
808
+ tech_categories = ['marketing', 'ui_ux_design']
809
+ elif job_domain == 'consultancy':
810
+ tech_categories = ['consultancy', 'business_analysis']
811
+ elif job_domain == 'ai_ml_engineering':
812
+ tech_categories = ['ai_ml_engineering', 'data_science', 'programming']
813
+ else:
814
+ tech_categories = ['programming', 'databases', 'cloud']
815
+
816
+ job_tech = set()
817
+ resume_tech = set()
818
+
819
+ for category in tech_categories:
820
+ if category in self.skill_categories:
821
+ for tech in self.skill_categories[category]:
822
+ if tech in job_desc.lower():
823
+ job_tech.add(tech)
824
+ if tech in resume.lower():
825
+ resume_tech.add(tech)
826
+
827
+ if not job_tech:
828
+ # More generous fallback using semantic similarity
829
+ return min(60, self.calculate_semantic_similarity(job_desc, resume) * 120)
830
+
831
+ matches = len(job_tech.intersection(resume_tech))
832
+
833
+ if matches == 0:
834
+ # Give more credit for having any relevant tech
835
+ if resume_tech:
836
+ return min(40, len(resume_tech) * 8)
837
+ return 15 # Small base score instead of 0
838
+
839
+ # Apply domain compatibility with more generous scoring
840
+ domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
841
+
842
+ base_score = (matches / len(job_tech)) * 120
843
+ final_score = base_score * (0.7 + 0.3 * domain_compatibility)
844
+
845
+ return min(100, final_score)
846
+
847
+ def score_soft_skills(self, job_desc, resume):
848
+ """Score soft skills indicators - Enhanced with interest-based inference"""
849
+ resume_lower = resume.lower()
850
+ job_lower = job_desc.lower()
851
+
852
+ # Direct soft skills mentioned
853
+ direct_soft_skills = self.skill_categories['soft_skills']
854
+ job_soft_count = sum(1 for skill in direct_soft_skills if skill in job_lower)
855
+ resume_soft_count = sum(1 for skill in direct_soft_skills if skill in resume_lower)
856
+
857
+ # Calculate direct soft skills score
858
+ if job_soft_count > 0:
859
+ direct_score = min(50, (resume_soft_count / job_soft_count) * 50)
860
+ else:
861
+ direct_score = min(40, resume_soft_count * 10)
862
+
863
+ # Inferred soft skills from interests and activities
864
+ inferred_skills = set()
865
+ resume_text = resume_lower
866
+
867
+ for skill_type, indicators in self.interest_skill_mapping.items():
868
+ skill_indicators_found = sum(1 for indicator in indicators if indicator in resume_text)
869
+ if skill_indicators_found > 0:
870
+ inferred_skills.add(skill_type)
871
+
872
+ # Map inferred skills to job requirements
873
+ job_skill_requirements = set()
874
+ if 'leadership' in job_lower or 'lead' in job_lower or 'manage' in job_lower:
875
+ job_skill_requirements.add('leadership')
876
+ if 'team' in job_lower or 'collaboration' in job_lower:
877
+ job_skill_requirements.add('teamwork')
878
+ if 'communication' in job_lower or 'present' in job_lower:
879
+ job_skill_requirements.add('communication')
880
+ if 'creative' in job_lower or 'innovation' in job_lower or 'design' in job_lower:
881
+ job_skill_requirements.add('creativity')
882
+ if 'problem' in job_lower or 'analytical' in job_lower or 'analysis' in job_lower:
883
+ job_skill_requirements.add('analytical')
884
+ if 'dedicated' in job_lower or 'commitment' in job_lower:
885
+ job_skill_requirements.add('dedication')
886
+ if 'adapt' in job_lower or 'flexible' in job_lower:
887
+ job_skill_requirements.add('adaptability')
888
+
889
+ # Score inferred skills
890
+ inferred_score = 0
891
+ if job_skill_requirements:
892
+ matched_inferred = job_skill_requirements.intersection(inferred_skills)
893
+ if matched_inferred:
894
+ inferred_score = (len(matched_inferred) / len(job_skill_requirements)) * 35
895
+ else:
896
+ inferred_score = min(25, len(inferred_skills) * 5)
897
+
898
+ # Activity-based bonus scoring
899
+ activity_bonus = 0
900
+ high_value_activities = ['ncc', 'captain', 'president', 'volunteer', 'community service', 'marathon', 'debate']
901
+ activity_count = sum(1 for activity in high_value_activities if activity in resume_lower)
902
+ activity_bonus = min(15, activity_count * 3)
903
+
904
+ final_score = min(100, direct_score + inferred_score + activity_bonus)
905
+ return final_score
906
+
907
+ def calculate_final_score(self, job_description, resume):
908
+ """Calculate the weighted final score"""
909
+ scores = {}
910
+
911
+ # Calculate individual dimension scores
912
+ scores['relevant_skills'] = self.score_relevant_skills(job_description, resume)
913
+ scores['work_experience'] = self.score_work_experience(job_description, resume)
914
+ scores['education'] = self.score_education(job_description, resume)
915
+ scores['certifications'] = self.score_certifications(job_description, resume)
916
+ scores['projects'] = self.score_projects(job_description, resume)
917
+ scores['keywords_match'] = self.score_keywords_match(job_description, resume)
918
+ scores['tools_tech'] = self.score_tools_tech(job_description, resume)
919
+ scores['soft_skills'] = self.score_soft_skills(job_description, resume)
920
+
921
+ # Calculate weighted final score
922
+ final_score = sum(scores[dim] * self.weights[dim] for dim in scores)
923
+
924
+ return final_score, scores
925
+
926
+ # Initialize the scorer
927
+ scorer = ATSScorer()
928
+
929
+ def score_resume(job_description, resume_file, resume_text):
930
+ """Main function to score resume against job description"""
931
+ if not job_description.strip():
932
+ return "Please provide a job description.", ""
933
+
934
+ # Determine resume source
935
+ resume_content = ""
936
+ if resume_file is not None:
937
+ try:
938
+ resume_content = scorer.extract_text_from_file(resume_file)
939
+ if not resume_content.strip():
940
+ return "Could not extract text from the uploaded file. Please check the file format.", ""
941
+ except Exception as e:
942
+ return f"Error processing file: {str(e)}", ""
943
+ elif resume_text.strip():
944
+ resume_content = resume_text.strip()
945
+ else:
946
+ return "Please provide either a resume file (PDF/DOCX) or paste resume text.", ""
947
+
948
+ try:
949
+ final_score, dimension_scores = scorer.calculate_final_score(job_description, resume_content)
950
+
951
+ # Detect domains for additional context
952
+ job_domain = scorer.detect_job_domain(job_description)
953
+ resume_domain = scorer.detect_resume_domain(resume_content)
954
+ domain_compatibility = scorer.calculate_domain_compatibility(job_domain, resume_domain)
955
+
956
+ # Create detailed breakdown
957
+ breakdown = f"""
958
+ ## Overall ATS Score: {final_score:.1f}/100
959
+
960
+ ### Domain Analysis:
961
+ - **Job Domain**: {job_domain.replace('_', ' ').title()}
962
+ - **Resume Domain**: {resume_domain.replace('_', ' ').title()}
963
+ - **Domain Compatibility**: {domain_compatibility:.1%}
964
+
965
+ ### Dimension Breakdown:
966
+ - **Relevant Skills** (25%): {dimension_scores['relevant_skills']:.1f}/100
967
+ - **Work Experience** (20%): {dimension_scores['work_experience']:.1f}/100
968
+ - **Education** (10%): {dimension_scores['education']:.1f}/100
969
+ - **Certifications & Courses** (7%): {dimension_scores['certifications']:.1f}/100
970
+ - **Projects** (10%): {dimension_scores['projects']:.1f}/100
971
+ - **Keywords Match** (10%): {dimension_scores['keywords_match']:.1f}/100
972
+ - **Tools & Technologies** (10%): {dimension_scores['tools_tech']:.1f}/100
973
+ - **Soft Skills Indicators** (8%): {dimension_scores['soft_skills']:.1f}/100
974
+
975
+ ### Score Interpretation:
976
+ - **90-100**: Excellent match
977
+ - **76-89**: Very good match
978
+ - **56-75**: Good match
979
+ - **45-55**: Fair match
980
+ - **Below 40**: Poor match
981
+
982
+ ### Recommendations:
983
+ """
984
+
985
+ # Add recommendations based on low scores and domain mismatch
986
+ recommendations = []
987
+
988
+ if domain_compatibility < 0.5:
989
+ recommendations.append(f"- **Domain Mismatch**: Your resume appears to be focused on {resume_domain.replace('_', ' ')} while the job is in {job_domain.replace('_', ' ')}. Consider highlighting transferable skills.")
990
+
991
+ if dimension_scores['relevant_skills'] < 70:
992
+ recommendations.append("- **Skills**: Add more job-specific technical skills to your resume")
993
+ if dimension_scores['work_experience'] < 70:
994
+ recommendations.append("- **Experience**: Highlight more relevant work experience or projects")
995
+ if dimension_scores['keywords_match'] < 70:
996
+ recommendations.append("- **Keywords**: Include more job-specific keywords throughout your resume")
997
+ if dimension_scores['tools_tech'] < 70:
998
+ recommendations.append("- **Technology**: Emphasize technical tools and technologies mentioned in the job description")
999
+ if dimension_scores['projects'] < 70:
1000
+ recommendations.append("- **Projects**: Add more relevant projects that demonstrate required skills and use job-specific technologies")
1001
+
1002
+ if not recommendations:
1003
+ recommendations.append("- **Excellent!** Your resume is well-aligned with the job requirements")
1004
+
1005
+ breakdown += "\n".join(recommendations)
1006
+
1007
+ # Create score chart data
1008
+ chart_data = pd.DataFrame({
1009
+ 'Dimension': [
1010
+ 'Relevant Skills', 'Work Experience', 'Education',
1011
+ 'Certifications', 'Projects', 'Keywords Match',
1012
+ 'Tools & Tech', 'Soft Skills'
1013
+ ],
1014
+ 'Score': [
1015
+ dimension_scores['relevant_skills'],
1016
+ dimension_scores['work_experience'],
1017
+ dimension_scores['education'],
1018
+ dimension_scores['certifications'],
1019
+ dimension_scores['projects'],
1020
+ dimension_scores['keywords_match'],
1021
+ dimension_scores['tools_tech'],
1022
+ dimension_scores['soft_skills']
1023
+ ],
1024
+ 'Weight (%)': [25, 20, 10, 7, 10, 10, 10, 8]
1025
+ })
1026
+
1027
+ return breakdown, chart_data
1028
+
1029
+ except Exception as e:
1030
+ return f"Error processing resume: {str(e)}", ""
1031
+
1032
+ # Create Gradio interface
1033
+ with gr.Blocks(title="ATS Resume Scorer", theme=gr.themes.Soft()) as demo:
1034
+ gr.Markdown("""
1035
+ # 🎯 ATS Resume Scorer
1036
+
1037
+ This tool evaluates how well a resume matches a job description using 8 key dimensions:
1038
+ - **Relevant Skills** (25%) - Match of skills to job requirements
1039
+ - **Work Experience** (20%) - Years and relevance of experience
1040
+ - **Education** (10%) - Degree relevance and performance
1041
+ - **Certifications & Courses** (7%) - Additional qualifications
1042
+ - **Projects** (10%) - Quality and relevance of projects
1043
+ - **Keywords Match** (10%) - Job-specific keyword alignment
1044
+ - **Tools & Technologies** (10%) - Technical proficiency
1045
+ - **Soft Skills** (8%) - Leadership, teamwork, communication
1046
+
1047
+ **Supported Domains:** Web Development, Mobile Development, Data Science, Cybersecurity, DevOps, Game Development, UI/UX Design, Business Analysis, Marketing, Consultancy, AI/ML Engineering
1048
+
1049
+ **πŸ“„ Resume Input:** Upload PDF/DOCX file OR paste text manually
1050
+ **πŸ“‹ Job Description:** Paste as text
1051
+ """)
1052
+
1053
+ with gr.Row():
1054
+ with gr.Column():
1055
+ job_desc_input = gr.Textbox(
1056
+ label="πŸ“‹ Job Description",
1057
+ placeholder="Paste the complete job description here...",
1058
+ lines=12,
1059
+ max_lines=20
1060
+ )
1061
+
1062
+ with gr.Column():
1063
+ gr.Markdown("### πŸ“„ Resume Input")
1064
+
1065
+ with gr.Tab("Upload File (PDF/DOCX)"):
1066
+ resume_file_input = gr.File(
1067
+ label="Upload Resume",
1068
+ file_types=[".pdf", ".docx", ".doc"],
1069
+ type="filepath"
1070
+ )
1071
+ gr.Markdown("*Supported formats: PDF, DOCX, DOC*")
1072
+
1073
+ with gr.Tab("Paste Text"):
1074
+ resume_text_input = gr.Textbox(
1075
+ label="Resume Text",
1076
+ placeholder="Or paste your resume text here...",
1077
+ lines=10,
1078
+ max_lines=15
1079
+ )
1080
+
1081
+ score_btn = gr.Button("πŸ“Š Score Resume", variant="primary", size="lg")
1082
+
1083
+ with gr.Row():
1084
+ with gr.Column():
1085
+ score_output = gr.Markdown(label="Scoring Results")
1086
+
1087
+ with gr.Column():
1088
+ chart_output = gr.Dataframe(
1089
+ label="Dimension Scores",
1090
+ headers=['Dimension', 'Score', 'Weight (%)'],
1091
+ datatype=['str', 'number', 'number']
1092
+ )
1093
+
1094
+ # Example inputs
1095
+ gr.Examples(
1096
+ examples=[
1097
+ [
1098
+ """Frontend Developer - React.js
1099
+ We are seeking a Frontend Developer with 2+ years of experience in React.js development.
1100
+ Requirements:
1101
+ - Bachelor's degree in Computer Science or related field
1102
+ - Strong proficiency in JavaScript, HTML, CSS
1103
+ - Experience with React.js, Redux, and modern frontend frameworks
1104
+ - Knowledge of responsive design and cross-browser compatibility
1105
+ - Experience with version control (Git)
1106
+ - Understanding of RESTful APIs
1107
+ - Strong problem-solving skills and attention to detail""",
1108
+
1109
+ None, # No file upload in example
1110
+
1111
+ """John Smith
1112
+ Frontend Developer
1113
+
1114
+ Education:
1115
+ - Bachelor of Technology in Computer Science, ABC University (2020)
1116
+
1117
+ Experience:
1118
+ - Frontend Developer at Tech Solutions (2021-2024, 3 years)
1119
+ - Developed responsive web applications using React.js and Redux
1120
+ - Collaborated with backend developers to integrate RESTful APIs
1121
+ - Implemented modern CSS frameworks and ensured cross-browser compatibility
1122
+
1123
+ Skills:
1124
+ - Frontend: JavaScript, HTML5, CSS3, React.js, Redux, Vue.js
1125
+ - Tools: Git, Webpack, npm, VS Code
1126
+ - Responsive Design, Cross-browser compatibility
1127
+ - RESTful API integration
1128
+
1129
+ Projects:
1130
+ - E-commerce Website: Built using React.js with Redux for state management
1131
+ - Portfolio Dashboard: Responsive web application with modern UI/UX"""
1132
+ ],
1133
+ [
1134
+ """UI/UX Designer - Product Design
1135
+ We are seeking a UI/UX Designer with 2+ years of experience in product design and user research.
1136
+ Requirements:
1137
+ - Bachelor's degree in Design, HCI, or related field
1138
+ - Strong proficiency in Figma, Sketch, and Adobe Creative Suite
1139
+ - Experience with user research and usability testing
1140
+ - Knowledge of design systems and prototyping
1141
+ - Understanding of frontend technologies (HTML, CSS, JavaScript)
1142
+ - Strong visual design and interaction design skills
1143
+ - Experience with A/B testing and data-driven design
1144
+ - Excellent communication and collaboration skills""",
1145
+
1146
+ None, # No file upload in example
1147
+
1148
+ """Sarah Johnson
1149
+ UI/UX Designer
1150
+
1151
+ Education:
1152
+ - Bachelor of Fine Arts in Graphic Design, Art Institute (2020)
1153
+
1154
+ Experience:
1155
+ - UI/UX Designer at Design Studio (2021-2024, 3 years)
1156
+ - Created user interfaces and experiences for web and mobile applications
1157
+ - Conducted user research and usability testing sessions
1158
+ - Developed design systems and component libraries using Figma
1159
+ - Collaborated with frontend developers on implementation
1160
+
1161
+ Skills:
1162
+ - Design Tools: Figma, Sketch, Adobe XD, Photoshop, Illustrator
1163
+ - Prototyping: InVision, Principle, Framer
1164
+ - Research: User interviews, A/B testing, Analytics
1165
+ - Frontend: HTML, CSS, JavaScript basics
1166
+ - Design: Visual design, Interaction design, Wireframing
1167
+
1168
+ Projects:
1169
+ - E-commerce Mobile App: Designed complete user experience with user research and prototyping
1170
+ - SaaS Dashboard Redesign: Led design system creation and improved user engagement by 40%
1171
+
1172
+ Certifications:
1173
+ - Google UX Design Certificate
1174
+ - Figma Advanced Certification"""
1175
+ ]
1176
+ ],
1177
+ inputs=[job_desc_input, resume_file_input, resume_text_input]
1178
+ )
1179
+
1180
+ score_btn.click(
1181
+ fn=score_resume,
1182
+ inputs=[job_desc_input, resume_file_input, resume_text_input],
1183
+ outputs=[score_output, chart_output]
1184
+ )
1185
+
1186
+ if __name__ == "__main__":
1187
+ demo.launch()