KavyaBansal commited on
Commit
914198f
Β·
verified Β·
1 Parent(s): 69220e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +495 -800
app.py CHANGED
@@ -11,6 +11,12 @@ import PyPDF2
11
  import docx
12
  import io
13
  from pathlib import Path
 
 
 
 
 
 
14
 
15
  class ATSScorer:
16
  def __init__(self):
@@ -195,6 +201,87 @@ class ATSScorer:
195
  ]
196
  }
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  def extract_text_from_pdf(self, pdf_file):
199
  """Extract text from PDF file"""
200
  try:
@@ -222,14 +309,14 @@ class ATSScorer:
222
  if file is None:
223
  return ""
224
 
225
- file_path = Path(file.name)
226
  file_extension = file_path.suffix.lower()
227
 
228
  try:
229
  if file_extension == '.pdf':
230
- return self.extract_text_from_pdf(file.name)
231
  elif file_extension in ['.docx', '.doc']:
232
- return self.extract_text_from_docx(file.name)
233
  else:
234
  raise Exception(f"Unsupported file format: {file_extension}. Please upload PDF or DOCX files.")
235
  except Exception as e:
@@ -237,699 +324,427 @@ class ATSScorer:
237
 
238
  def preprocess_text(self, text):
239
  """Clean and preprocess text"""
240
- if not text:
241
- return ""
242
- text = text.lower().strip()
243
  # Remove extra whitespace
244
  text = re.sub(r'\s+', ' ', text)
245
- return text
246
-
247
- def detect_job_domain(self, job_desc):
248
- """Detect the primary domain of the job with improved priority-based scoring"""
249
- job_lower = job_desc.lower()
250
-
251
- domain_scores = {}
252
-
253
- for domain, indicators in self.domain_indicators.items():
254
- score = 0
255
-
256
- # High priority indicators (job titles, specific roles) - weight 10
257
- for indicator in indicators['high_priority']:
258
- if indicator in job_lower:
259
- score += 10
260
-
261
- # Medium priority indicators (domain-specific terms) - weight 3
262
- for indicator in indicators['medium_priority']:
263
- if indicator in job_lower:
264
- score += 3
265
-
266
- # Low priority indicators (tools, technologies) - weight 1
267
- for indicator in indicators['low_priority']:
268
- if indicator in job_lower:
269
- score += 1
270
-
271
- domain_scores[domain] = score
272
-
273
- # Return the domain with highest score, or 'general' if no matches
274
- if max(domain_scores.values()) > 0:
275
- return max(domain_scores, key=domain_scores.get)
276
- else:
277
- return 'general'
278
-
279
- def detect_resume_domain(self, resume):
280
- """Detect the primary domain of the resume with improved priority-based scoring"""
281
- resume_lower = resume.lower()
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  domain_scores = {}
284
-
285
- for domain, indicators in self.domain_indicators.items():
286
  score = 0
287
-
288
- # High priority indicators (job titles, specific roles) - weight 10
289
- for indicator in indicators['high_priority']:
290
- if indicator in resume_lower:
291
- score += 10
292
-
293
- # Medium priority indicators (domain-specific terms) - weight 3
294
- for indicator in indicators['medium_priority']:
295
- if indicator in resume_lower:
296
  score += 3
297
-
298
- # Low priority indicators (tools, technologies) - weight 1
299
- for indicator in indicators['low_priority']:
300
- if indicator in resume_lower:
 
 
 
301
  score += 1
302
-
303
  domain_scores[domain] = score
304
-
305
- # Return the domain with highest score, or 'general' if no matches
306
- if max(domain_scores.values()) > 0:
307
  return max(domain_scores, key=domain_scores.get)
308
- else:
309
- return 'general'
310
-
311
- def calculate_domain_compatibility(self, job_domain, resume_domain):
312
- """Calculate compatibility between job and resume domains"""
313
- if job_domain == resume_domain:
314
- return 1.0
315
-
316
- # More generous domain compatibility matrix
317
- compatibility_matrix = {
318
- ('cybersecurity', 'web_development'): 0.7,
319
- ('cybersecurity', 'mobile_development'): 0.6,
320
- ('cybersecurity', 'data_science'): 0.8,
321
- ('cybersecurity', 'ai_ml_engineering'): 0.8,
322
- ('web_development', 'mobile_development'): 0.9,
323
- ('web_development', 'data_science'): 0.8,
324
- ('web_development', 'ui_ux_design'): 0.9,
325
- ('mobile_development', 'data_science'): 0.7,
326
- ('mobile_development', 'ui_ux_design'): 0.8,
327
- ('devops', 'web_development'): 0.8,
328
- ('devops', 'cybersecurity'): 0.7,
329
- ('devops', 'ai_ml_engineering'): 0.8,
330
- ('game_development', 'web_development'): 0.7,
331
- ('game_development', 'mobile_development'): 0.8,
332
- ('ui_ux_design', 'web_development'): 0.9,
333
- ('ui_ux_design', 'mobile_development'): 0.8,
334
- ('ui_ux_design', 'marketing'): 0.7,
335
- ('business_analysis', 'consultancy'): 0.9,
336
- ('business_analysis', 'marketing'): 0.7,
337
- ('business_analysis', 'data_science'): 0.7,
338
- ('marketing', 'consultancy'): 0.8,
339
- ('marketing', 'business_analysis'): 0.7,
340
- ('marketing', 'ui_ux_design'): 0.7,
341
- ('consultancy', 'business_analysis'): 0.9,
342
- ('consultancy', 'marketing'): 0.8,
343
- ('ai_ml_engineering', 'data_science'): 0.95,
344
- ('ai_ml_engineering', 'web_development'): 0.8,
345
- ('ai_ml_engineering', 'cybersecurity'): 0.8,
346
- ('data_science', 'ai_ml_engineering'): 0.95,
347
- }
348
-
349
- # Check both directions
350
- compatibility = compatibility_matrix.get((job_domain, resume_domain),
351
- compatibility_matrix.get((resume_domain, job_domain), 0.5))
352
-
353
- return compatibility
354
 
355
- def extract_years_of_experience(self, text):
356
  """Extract years of experience from text"""
357
- text = text.lower()
 
 
358
  patterns = [
359
- r'(\d+)\+?\s*years?\s+(?:of\s+)?experience',
360
- r'(\d+)\+?\s*yrs?\s+(?:of\s+)?experience',
361
- r'experience.*?(\d+)\+?\s*years?',
362
- r'(\d+)\+?\s*years?\s+in\s+'
 
 
363
  ]
364
-
365
  years = []
366
  for pattern in patterns:
367
  matches = re.findall(pattern, text)
368
  years.extend([int(match) for match in matches])
369
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  return max(years) if years else 0
371
 
372
- def extract_contextual_keywords(self, text, job_domain="general"):
373
- """Extract keywords with domain context awareness"""
374
- text = self.preprocess_text(text)
375
- keywords = set()
376
-
377
- # Get relevant categories based on domain
378
- relevant_categories = []
379
- if job_domain == 'cybersecurity':
380
- relevant_categories = ['cybersecurity', 'programming']
381
- elif job_domain == 'web_development':
382
- relevant_categories = ['web_development', 'programming', 'databases']
383
- elif job_domain == 'mobile_development':
384
- relevant_categories = ['mobile_development', 'programming']
385
- elif job_domain == 'data_science':
386
- relevant_categories = ['data_science', 'programming', 'databases']
387
- elif job_domain == 'ui_ux_design':
388
- relevant_categories = ['ui_ux_design', 'web_development']
389
- elif job_domain == 'business_analysis':
390
- relevant_categories = ['business_analysis', 'databases']
391
- elif job_domain == 'marketing':
392
- relevant_categories = ['marketing', 'ui_ux_design']
393
- elif job_domain == 'consultancy':
394
- relevant_categories = ['consultancy', 'business_analysis']
395
- elif job_domain == 'ai_ml_engineering':
396
- relevant_categories = ['ai_ml_engineering', 'data_science', 'programming']
397
- else:
398
- relevant_categories = ['programming', 'databases', 'cloud', 'web_development']
399
-
400
- # Extract keywords from relevant categories
401
- for category in relevant_categories:
402
- if category in self.skill_categories:
403
- for skill in self.skill_categories[category]:
404
- if skill in text:
405
- keywords.add(skill)
406
-
407
- # Use spaCy for entity extraction if available
408
- if self.nlp:
409
- doc = self.nlp(text)
410
- for ent in doc.ents:
411
- if ent.label_ in ['ORG', 'PRODUCT', 'LANGUAGE']:
412
- keywords.add(ent.text.lower())
413
-
414
- return list(keywords)
415
-
416
- def calculate_semantic_similarity(self, text1, text2):
417
- """Calculate semantic similarity between two texts with lower threshold"""
418
- if not text1 or not text2:
419
- return 0.0
420
-
421
- embeddings = self.sentence_model.encode([text1, text2])
422
- similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
423
-
424
- # Lower threshold for more inclusive matching
425
- if similarity < 0.15:
426
- return 0.0
427
-
428
- return max(0, similarity)
429
-
430
- def score_relevant_skills(self, job_desc, resume):
431
- """Score skill relevance with more generous scoring"""
432
- job_domain = self.detect_job_domain(job_desc)
433
- resume_domain = self.detect_resume_domain(resume)
434
-
435
- job_keywords = set(self.extract_contextual_keywords(job_desc, job_domain))
436
- resume_keywords = set(self.extract_contextual_keywords(resume, job_domain))
437
-
438
- if not job_keywords:
439
- # More generous fallback using semantic similarity
440
- semantic_score = self.calculate_semantic_similarity(job_desc, resume) * 120
441
- return min(80, semantic_score)
442
-
443
- # Exact keyword matching
444
- exact_matches = len(job_keywords.intersection(resume_keywords))
445
- exact_score = exact_matches / len(job_keywords)
446
-
447
- # Semantic similarity with higher weight
448
- semantic_score = self.calculate_semantic_similarity(job_desc, resume)
449
-
450
- # More generous base scoring
451
- base_score = (exact_score * 0.6 + semantic_score * 0.4) * 120
452
-
453
- # Apply domain compatibility with minimal penalty
454
- domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
455
- final_score = base_score * (0.7 + 0.3 * domain_compatibility) # Minimum 70% of base score
456
-
457
- return min(100, final_score)
458
-
459
- def score_work_experience(self, job_desc, resume):
460
- """Score work experience with more generous scoring"""
461
- resume_years = self.extract_years_of_experience(resume)
462
- job_years = self.extract_years_of_experience(job_desc)
463
-
464
- job_domain = self.detect_job_domain(job_desc)
465
- resume_domain = self.detect_resume_domain(resume)
466
-
467
- # Years of experience score
468
- if job_years > 0:
469
- years_score = min(100, (resume_years / job_years) * 120)
470
- else:
471
- years_score = 60 if resume_years > 0 else 20
472
-
473
- # Domain-aware semantic similarity
474
- semantic_score = self.calculate_semantic_similarity(job_desc, resume) * 120
475
-
476
- # Apply domain compatibility
477
- domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
478
-
479
- # Combine scores with more generous weighting
480
- base_score = (years_score * 0.4 + semantic_score * 0.6)
481
- final_score = base_score * (0.7 + 0.3 * domain_compatibility)
482
-
483
- return min(100, final_score)
484
-
485
- def score_education(self, job_desc, resume):
486
- """Score education relevance - Enhanced for undergraduates"""
487
  resume_lower = resume.lower()
488
- job_lower = job_desc.lower()
489
-
490
- # Extract required degree from job description
491
- required_degrees = []
492
- for degree_type in self.education_patterns['degree_types']:
493
- if degree_type in job_lower:
494
- required_degrees.append(degree_type)
495
-
496
- # Check if candidate is undergraduate
497
- is_undergraduate = any(pattern in resume_lower for pattern in self.education_patterns['undergraduate'])
498
-
499
- # Determine candidate's year if undergraduate
500
- year_score_multiplier = 1.0
501
- if is_undergraduate:
502
- if any(year in resume_lower for year in ['final year', 'fourth year', 'senior']):
503
- year_score_multiplier = 0.95
504
- elif any(year in resume_lower for year in ['third year', 'junior']):
505
- year_score_multiplier = 0.85
506
- elif any(year in resume_lower for year in ['second year', 'sophomore']):
507
- year_score_multiplier = 0.70
508
- elif any(year in resume_lower for year in ['first year', 'freshman']):
509
- year_score_multiplier = 0.55
510
-
511
- # Check degree match with more generous scoring
512
- degree_match_score = 0
513
- if required_degrees:
514
- candidate_degrees = []
515
- for degree_type in self.education_patterns['degree_types']:
516
- if degree_type in resume_lower:
517
- candidate_degrees.append(degree_type)
518
-
519
- if candidate_degrees:
520
- if any(req_deg in candidate_degrees for req_deg in required_degrees):
521
- degree_match_score = 85
522
- elif any(deg in ['btech', 'be', 'bs', 'bachelor'] for deg in candidate_degrees) and \
523
- any(deg in ['bachelor', 'btech', 'be', 'bs'] for deg in required_degrees):
524
- degree_match_score = 80
525
- elif any(deg in ['master', 'ms', 'ma', 'mtech', 'mba'] for deg in candidate_degrees) and \
526
- any(deg in ['bachelor', 'btech', 'be', 'bs'] for deg in required_degrees):
527
- degree_match_score = 90
528
- else:
529
- degree_match_score = 50
530
- else:
531
- degree_match_score = 20
532
  else:
533
- education_present = any(keyword in resume_lower for keyword in self.education_keywords)
534
- degree_match_score = 60 if education_present else 20
535
 
536
- # Apply undergraduate multiplier
537
- if is_undergraduate and degree_match_score > 0:
538
- degree_match_score *= year_score_multiplier
539
-
540
- # Higher semantic similarity bonus
541
- semantic_bonus = self.calculate_semantic_similarity(job_desc, resume) * 20
542
-
543
- final_score = min(100, degree_match_score + semantic_bonus)
544
- return final_score
545
-
546
- def score_certifications(self, job_desc, resume):
547
- """Score certifications and courses (7% weight)"""
548
  resume_lower = resume.lower()
549
- job_lower = job_desc.lower()
550
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
  # Check for certification keywords
552
- cert_count = sum(1 for keyword in self.certification_keywords if keyword in resume_lower)
553
-
554
- # Return 0 if no certifications found
555
- if cert_count == 0:
556
- return 0
557
-
558
- # Check for domain-specific certifications
559
- job_domain = self.detect_job_domain(job_desc)
 
 
 
 
 
 
 
 
 
 
560
  domain_cert_bonus = 0
561
-
562
- if job_domain == 'cybersecurity':
563
- cyber_certs = ['oscp', 'cissp', 'ceh', 'giac', 'sans', 'security+']
564
- domain_cert_bonus = sum(20 for cert in cyber_certs if cert in resume_lower)
565
- elif job_domain == 'web_development':
566
- web_certs = ['aws certified', 'google cloud', 'azure certified', 'mongodb certified']
567
- domain_cert_bonus = sum(15 for cert in web_certs if cert in resume_lower)
568
- elif job_domain == 'data_science':
569
- data_certs = ['tensorflow developer', 'aws machine learning', 'google data engineer', 'microsoft azure ai']
570
- domain_cert_bonus = sum(15 for cert in data_certs if cert in resume_lower)
571
- elif job_domain == 'ui_ux_design':
572
- design_certs = ['adobe certified', 'figma certified', 'ux certification', 'design thinking', 'google ux']
573
- domain_cert_bonus = sum(15 for cert in design_certs if cert in resume_lower)
574
- elif job_domain == 'business_analysis':
575
- ba_certs = ['cbap', 'ccba', 'pmp', 'agile certified', 'scrum master', 'business analysis']
576
- domain_cert_bonus = sum(15 for cert in ba_certs if cert in resume_lower)
577
- elif job_domain == 'marketing':
578
- marketing_certs = ['google ads', 'facebook blueprint', 'hubspot', 'google analytics', 'digital marketing']
579
- domain_cert_bonus = sum(15 for cert in marketing_certs if cert in resume_lower)
580
- elif job_domain == 'consultancy':
581
- consulting_certs = ['pmp', 'prince2', 'change management', 'lean six sigma', 'agile certified']
582
- domain_cert_bonus = sum(15 for cert in consulting_certs if cert in resume_lower)
583
- elif job_domain == 'ai_ml_engineering':
584
- ai_certs = ['tensorflow developer', 'aws machine learning', 'google cloud ml', 'nvidia deep learning', 'microsoft ai']
585
- domain_cert_bonus = sum(15 for cert in ai_certs if cert in resume_lower)
586
-
587
- # More generous base score for having certifications
588
- base_score = min(60, cert_count * 25)
589
-
590
- # Relevance to job description
591
- relevance_score = self.calculate_semantic_similarity(job_desc, resume) * 30
592
-
593
- return min(100, base_score + relevance_score + domain_cert_bonus)
594
-
595
- def classify_project_category(self, project_text):
596
- """Classify project into categories based on description"""
597
- project_lower = project_text.lower()
598
-
599
- category_scores = {}
600
  for category, keywords in self.project_categories.items():
601
- score = sum(1 for keyword in keywords if keyword in project_lower)
602
- if score > 0:
603
- category_scores[category] = score
604
-
605
- if not category_scores:
606
- return 'general'
607
-
608
- return max(category_scores, key=category_scores.get)
609
-
610
- def extract_project_keywords(self, project_text, job_domain):
611
- """Extract technical keywords from project description"""
612
- project_lower = project_text.lower()
613
- keywords = set()
614
-
615
- # Get relevant categories based on job domain
616
- relevant_categories = []
617
- if job_domain == 'cybersecurity':
618
- relevant_categories = ['cybersecurity', 'programming']
619
- elif job_domain == 'web_development':
620
- relevant_categories = ['web_development', 'programming', 'databases']
621
- elif job_domain == 'mobile_development':
622
- relevant_categories = ['mobile_development', 'programming']
623
- elif job_domain == 'data_science':
624
- relevant_categories = ['data_science', 'programming', 'databases']
625
- elif job_domain == 'ui_ux_design':
626
- relevant_categories = ['ui_ux_design', 'web_development']
627
- elif job_domain == 'business_analysis':
628
- relevant_categories = ['business_analysis', 'databases']
629
- elif job_domain == 'marketing':
630
- relevant_categories = ['marketing', 'ui_ux_design']
631
- elif job_domain == 'consultancy':
632
- relevant_categories = ['consultancy', 'business_analysis']
633
- elif job_domain == 'ai_ml_engineering':
634
- relevant_categories = ['ai_ml_engineering', 'data_science', 'programming']
635
- else:
636
- relevant_categories = ['programming', 'databases', 'cloud']
637
-
638
- # Extract keywords from relevant categories
639
- for category in relevant_categories:
640
- if category in self.skill_categories:
641
- for skill in self.skill_categories[category]:
642
- if skill in project_lower:
643
- keywords.add(skill)
644
-
645
- return keywords
646
-
647
- def score_projects(self, job_desc, resume):
648
- """Score projects with stricter keyword and category matching"""
649
  resume_lower = resume.lower()
650
- job_lower = job_desc.lower()
651
-
652
- job_domain = self.detect_job_domain(job_desc)
653
-
654
- # Extract job keywords for matching
655
- job_keywords = set(self.extract_contextual_keywords(job_desc, job_domain))
656
-
657
- # Find project sections
658
- project_sections = []
659
- lines = resume.split('\n')
660
- in_project_section = False
661
- current_project = ""
662
-
663
- for line in lines:
664
- line_lower = line.lower().strip()
665
- if any(keyword in line_lower for keyword in self.project_keywords):
666
- if current_project:
667
- project_sections.append(current_project)
668
- current_project = line
669
- in_project_section = True
670
- elif in_project_section:
671
- if line.strip() and not line.startswith('-') and not any(section_word in line_lower for section_word in ['experience', 'education', 'skills', 'certification']):
672
- current_project += " " + line
673
- elif line.strip().startswith('-') or not line.strip():
674
- current_project += " " + line
675
- else:
676
- if current_project:
677
- project_sections.append(current_project)
678
- current_project = ""
679
- in_project_section = False
680
-
681
- if current_project:
682
- project_sections.append(current_project)
683
-
684
- # If no projects found, return very low score
685
- if not project_sections:
686
- project_count = sum(1 for keyword in self.project_keywords if keyword in resume_lower)
687
- return 5 if project_count > 0 else 0
688
-
689
- # Analyze each project
690
- total_project_score = 0
691
- project_scores = []
692
-
693
- for project in project_sections:
694
- project_score = 0
695
-
696
- # Step 1: Direct keyword matching (highest priority)
697
- project_keywords = self.extract_project_keywords(project, job_domain)
698
- if job_keywords:
699
- keyword_matches = len(job_keywords.intersection(project_keywords))
700
- keyword_match_ratio = keyword_matches / len(job_keywords)
701
-
702
- if keyword_match_ratio >= 0.5: # 50% or more keywords match
703
- project_score = 80 + (keyword_match_ratio - 0.5) * 40 # 80-100 points
704
- elif keyword_match_ratio >= 0.3: # 30-49% keywords match
705
- project_score = 60 + (keyword_match_ratio - 0.3) * 100 # 60-80 points
706
- elif keyword_match_ratio >= 0.1: # 10-29% keywords match
707
- project_score = 30 + (keyword_match_ratio - 0.1) * 150 # 30-60 points
708
- elif keyword_matches > 0: # Some keywords match but less than 10%
709
- project_score = 20
710
- else:
711
- # Step 2: Category matching (if no keyword matches)
712
- project_category = self.classify_project_category(project)
713
-
714
- # Map project categories to job domains
715
- category_domain_mapping = {
716
- 'web_development': 'web_development',
717
- 'mobile_development': 'mobile_development',
718
- 'data_science': 'data_science',
719
- 'cybersecurity': 'cybersecurity',
720
- 'game_development': 'game_development',
721
- 'devops': 'devops',
722
- 'api_backend': 'web_development',
723
- 'desktop_application': 'general',
724
- 'ui_ux_design': 'ui_ux_design',
725
- 'business_analysis': 'business_analysis',
726
- 'marketing': 'marketing',
727
- 'ai_ml_engineering': 'ai_ml_engineering'
728
- }
729
-
730
- project_domain = category_domain_mapping.get(project_category, 'general')
731
-
732
- if project_domain == job_domain:
733
- project_score = 40 # Same domain but no keyword matches
734
- elif project_domain != 'general' and job_domain != 'general':
735
- # Check domain compatibility
736
- compatibility = self.calculate_domain_compatibility(job_domain, project_domain)
737
- project_score = 20 * compatibility # 0-20 points based on compatibility
738
- else:
739
- project_score = 10 # Very low score for unrelated projects
740
- else:
741
- # If no job keywords found, use semantic similarity as fallback
742
- semantic_score = self.calculate_semantic_similarity(job_desc, project)
743
- project_score = semantic_score * 50 # Max 50 points from semantic similarity
744
-
745
- project_scores.append(project_score)
746
-
747
- # Calculate final score based on best projects
748
- if project_scores:
749
- # Take average of all projects but give more weight to best projects
750
- project_scores.sort(reverse=True)
751
- if len(project_scores) == 1:
752
- total_project_score = project_scores[0]
753
- elif len(project_scores) == 2:
754
- total_project_score = (project_scores[0] * 0.7 + project_scores[1] * 0.3)
755
  else:
756
- # For 3+ projects, weight the top 3
757
- total_project_score = (project_scores[0] * 0.5 +
758
- project_scores[1] * 0.3 +
759
- project_scores[2] * 0.2)
760
-
761
- return min(100, total_project_score)
762
-
763
- def score_keywords_match(self, job_desc, resume):
764
- """Score keyword matching with more generous scoring"""
765
- job_domain = self.detect_job_domain(job_desc)
766
-
767
- job_keywords = self.extract_contextual_keywords(job_desc, job_domain)
768
- resume_keywords = self.extract_contextual_keywords(resume, job_domain)
769
-
770
- if not job_keywords:
771
- # More generous fallback using semantic similarity
772
- return min(70, self.calculate_semantic_similarity(job_desc, resume) * 140)
773
-
774
- matches = len(set(job_keywords).intersection(set(resume_keywords)))
775
-
776
- if matches == 0:
777
- # Give more credit for semantic similarity even with no exact matches
778
- return min(35, self.calculate_semantic_similarity(job_desc, resume) * 80)
779
-
780
- # Apply domain compatibility with more generous scoring
781
- resume_domain = self.detect_resume_domain(resume)
782
- domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
783
-
784
- base_score = (matches / len(job_keywords)) * 120
785
- final_score = base_score * (0.7 + 0.3 * domain_compatibility)
786
-
787
- return min(100, final_score)
788
-
789
- def score_tools_tech(self, job_desc, resume):
790
- """Score tools and technologies with more generous scoring"""
791
- job_domain = self.detect_job_domain(job_desc)
792
- resume_domain = self.detect_resume_domain(resume)
793
-
794
- # Select relevant tech categories based on job domain
795
- if job_domain == 'cybersecurity':
796
- tech_categories = ['cybersecurity', 'programming']
797
- elif job_domain == 'web_development':
798
- tech_categories = ['web_development', 'programming', 'databases', 'cloud']
799
- elif job_domain == 'mobile_development':
800
- tech_categories = ['mobile_development', 'programming']
801
- elif job_domain == 'data_science':
802
- tech_categories = ['data_science', 'programming', 'databases']
803
- elif job_domain == 'ui_ux_design':
804
- tech_categories = ['ui_ux_design', 'web_development']
805
- elif job_domain == 'business_analysis':
806
- tech_categories = ['business_analysis', 'databases']
807
- elif job_domain == 'marketing':
808
- tech_categories = ['marketing', 'ui_ux_design']
809
- elif job_domain == 'consultancy':
810
- tech_categories = ['consultancy', 'business_analysis']
811
- elif job_domain == 'ai_ml_engineering':
812
- tech_categories = ['ai_ml_engineering', 'data_science', 'programming']
813
- else:
814
- tech_categories = ['programming', 'databases', 'cloud']
815
-
816
- job_tech = set()
817
- resume_tech = set()
818
-
819
- for category in tech_categories:
820
  if category in self.skill_categories:
821
- for tech in self.skill_categories[category]:
822
- if tech in job_desc.lower():
823
- job_tech.add(tech)
824
- if tech in resume.lower():
825
- resume_tech.add(tech)
826
-
827
- if not job_tech:
828
- # More generous fallback using semantic similarity
829
- return min(60, self.calculate_semantic_similarity(job_desc, resume) * 120)
830
-
831
- matches = len(job_tech.intersection(resume_tech))
832
-
833
- if matches == 0:
834
- # Give more credit for having any relevant tech
835
- if resume_tech:
836
- return min(40, len(resume_tech) * 8)
837
- return 15 # Small base score instead of 0
838
-
839
- # Apply domain compatibility with more generous scoring
840
- domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
841
-
842
- base_score = (matches / len(job_tech)) * 120
843
- final_score = base_score * (0.7 + 0.3 * domain_compatibility)
844
-
845
- return min(100, final_score)
846
-
847
- def score_soft_skills(self, job_desc, resume):
848
- """Score soft skills indicators - Enhanced with interest-based inference"""
849
- resume_lower = resume.lower()
850
- job_lower = job_desc.lower()
851
-
852
- # Direct soft skills mentioned
853
- direct_soft_skills = self.skill_categories['soft_skills']
854
- job_soft_count = sum(1 for skill in direct_soft_skills if skill in job_lower)
855
- resume_soft_count = sum(1 for skill in direct_soft_skills if skill in resume_lower)
856
-
857
- # Calculate direct soft skills score
858
- if job_soft_count > 0:
859
- direct_score = min(50, (resume_soft_count / job_soft_count) * 50)
860
- else:
861
- direct_score = min(40, resume_soft_count * 10)
862
-
863
- # Inferred soft skills from interests and activities
864
- inferred_skills = set()
865
- resume_text = resume_lower
866
-
867
- for skill_type, indicators in self.interest_skill_mapping.items():
868
- skill_indicators_found = sum(1 for indicator in indicators if indicator in resume_text)
869
- if skill_indicators_found > 0:
870
- inferred_skills.add(skill_type)
871
-
872
- # Map inferred skills to job requirements
873
- job_skill_requirements = set()
874
- if 'leadership' in job_lower or 'lead' in job_lower or 'manage' in job_lower:
875
- job_skill_requirements.add('leadership')
876
- if 'team' in job_lower or 'collaboration' in job_lower:
877
- job_skill_requirements.add('teamwork')
878
- if 'communication' in job_lower or 'present' in job_lower:
879
- job_skill_requirements.add('communication')
880
- if 'creative' in job_lower or 'innovation' in job_lower or 'design' in job_lower:
881
- job_skill_requirements.add('creativity')
882
- if 'problem' in job_lower or 'analytical' in job_lower or 'analysis' in job_lower:
883
- job_skill_requirements.add('analytical')
884
- if 'dedicated' in job_lower or 'commitment' in job_lower:
885
- job_skill_requirements.add('dedication')
886
- if 'adapt' in job_lower or 'flexible' in job_lower:
887
- job_skill_requirements.add('adaptability')
888
-
889
- # Score inferred skills
890
- inferred_score = 0
891
- if job_skill_requirements:
892
- matched_inferred = job_skill_requirements.intersection(inferred_skills)
893
- if matched_inferred:
894
- inferred_score = (len(matched_inferred) / len(job_skill_requirements)) * 35
895
  else:
896
- inferred_score = min(25, len(inferred_skills) * 5)
897
-
898
- # Activity-based bonus scoring
899
- activity_bonus = 0
900
- high_value_activities = ['ncc', 'captain', 'president', 'volunteer', 'community service', 'marathon', 'debate']
901
- activity_count = sum(1 for activity in high_value_activities if activity in resume_lower)
902
- activity_bonus = min(15, activity_count * 3)
903
-
904
- final_score = min(100, direct_score + inferred_score + activity_bonus)
905
- return final_score
906
 
907
  def calculate_final_score(self, job_description, resume):
908
  """Calculate the weighted final score"""
909
  scores = {}
910
-
911
  # Calculate individual dimension scores
912
- scores['relevant_skills'] = self.score_relevant_skills(job_description, resume)
913
- scores['work_experience'] = self.score_work_experience(job_description, resume)
914
- scores['education'] = self.score_education(job_description, resume)
915
- scores['certifications'] = self.score_certifications(job_description, resume)
916
- scores['projects'] = self.score_projects(job_description, resume)
917
- scores['keywords_match'] = self.score_keywords_match(job_description, resume)
918
- scores['tools_tech'] = self.score_tools_tech(job_description, resume)
919
- scores['soft_skills'] = self.score_soft_skills(job_description, resume)
920
-
921
  # Calculate weighted final score
922
  final_score = sum(scores[dim] * self.weights[dim] for dim in scores)
923
-
924
  return final_score, scores
925
 
926
  # Initialize the scorer
927
  scorer = ATSScorer()
928
 
929
  def score_resume(job_description, resume_file, resume_text):
930
- """Main function to score resume against job description"""
931
  if not job_description.strip():
932
- return "Please provide a job description.", ""
933
 
934
  # Determine resume source
935
  resume_content = ""
@@ -937,31 +752,26 @@ def score_resume(job_description, resume_file, resume_text):
937
  try:
938
  resume_content = scorer.extract_text_from_file(resume_file)
939
  if not resume_content.strip():
940
- return "Could not extract text from the uploaded file. Please check the file format.", ""
941
  except Exception as e:
942
- return f"Error processing file: {str(e)}", ""
943
  elif resume_text.strip():
944
  resume_content = resume_text.strip()
945
  else:
946
- return "Please provide either a resume file (PDF/DOCX) or paste resume text.", ""
947
 
948
  try:
 
949
  final_score, dimension_scores = scorer.calculate_final_score(job_description, resume_content)
950
 
951
- # Detect domains for additional context
952
- job_domain = scorer.detect_job_domain(job_description)
953
- resume_domain = scorer.detect_resume_domain(resume_content)
954
- domain_compatibility = scorer.calculate_domain_compatibility(job_domain, resume_domain)
955
 
956
- # Create detailed breakdown
957
- breakdown = f"""
958
  ## Overall ATS Score: {final_score:.1f}/100
959
 
960
- ### Domain Analysis:
961
- - **Job Domain**: {job_domain.replace('_', ' ').title()}
962
- - **Resume Domain**: {resume_domain.replace('_', ' ').title()}
963
- - **Domain Compatibility**: {domain_compatibility:.1%}
964
-
965
  ### Dimension Breakdown:
966
  - **Relevant Skills** (25%): {dimension_scores['relevant_skills']:.1f}/100
967
  - **Work Experience** (20%): {dimension_scores['work_experience']:.1f}/100
@@ -978,32 +788,8 @@ def score_resume(job_description, resume_file, resume_text):
978
  - **56-75**: Good match
979
  - **45-55**: Fair match
980
  - **Below 40**: Poor match
981
-
982
- ### Recommendations:
983
  """
984
 
985
- # Add recommendations based on low scores and domain mismatch
986
- recommendations = []
987
-
988
- if domain_compatibility < 0.5:
989
- recommendations.append(f"- **Domain Mismatch**: Your resume appears to be focused on {resume_domain.replace('_', ' ')} while the job is in {job_domain.replace('_', ' ')}. Consider highlighting transferable skills.")
990
-
991
- if dimension_scores['relevant_skills'] < 70:
992
- recommendations.append("- **Skills**: Add more job-specific technical skills to your resume")
993
- if dimension_scores['work_experience'] < 70:
994
- recommendations.append("- **Experience**: Highlight more relevant work experience or projects")
995
- if dimension_scores['keywords_match'] < 70:
996
- recommendations.append("- **Keywords**: Include more job-specific keywords throughout your resume")
997
- if dimension_scores['tools_tech'] < 70:
998
- recommendations.append("- **Technology**: Emphasize technical tools and technologies mentioned in the job description")
999
- if dimension_scores['projects'] < 70:
1000
- recommendations.append("- **Projects**: Add more relevant projects that demonstrate required skills and use job-specific technologies")
1001
-
1002
- if not recommendations:
1003
- recommendations.append("- **Excellent!** Your resume is well-aligned with the job requirements")
1004
-
1005
- breakdown += "\n".join(recommendations)
1006
-
1007
  # Create score chart data
1008
  chart_data = pd.DataFrame({
1009
  'Dimension': [
@@ -1024,27 +810,19 @@ def score_resume(job_description, resume_file, resume_text):
1024
  'Weight (%)': [25, 20, 10, 7, 10, 10, 10, 8]
1025
  })
1026
 
1027
- return breakdown, chart_data
1028
 
1029
  except Exception as e:
1030
- return f"Error processing resume: {str(e)}", ""
1031
 
1032
- # Create Gradio interface
1033
- with gr.Blocks(title="ATS Resume Scorer", theme=gr.themes.Soft()) as demo:
1034
  gr.Markdown("""
1035
- # 🎯 ATS Resume Scorer
1036
 
1037
- This tool evaluates how well a resume matches a job description using 8 key dimensions:
1038
- - **Relevant Skills** (25%) - Match of skills to job requirements
1039
- - **Work Experience** (20%) - Years and relevance of experience
1040
- - **Education** (10%) - Degree relevance and performance
1041
- - **Certifications & Courses** (7%) - Additional qualifications
1042
- - **Projects** (10%) - Quality and relevance of projects
1043
- - **Keywords Match** (10%) - Job-specific keyword alignment
1044
- - **Tools & Technologies** (10%) - Technical proficiency
1045
- - **Soft Skills** (8%) - Leadership, teamwork, communication
1046
-
1047
- **Supported Domains:** Web Development, Mobile Development, Data Science, Cybersecurity, DevOps, Game Development, UI/UX Design, Business Analysis, Marketing, Consultancy, AI/ML Engineering
1048
 
1049
  **πŸ“„ Resume Input:** Upload PDF/DOCX file OR paste text manually
1050
  **πŸ“‹ Job Description:** Paste as text
@@ -1078,109 +856,26 @@ with gr.Blocks(title="ATS Resume Scorer", theme=gr.themes.Soft()) as demo:
1078
  max_lines=15
1079
  )
1080
 
1081
- score_btn = gr.Button("πŸ“Š Score Resume", variant="primary", size="lg")
1082
 
1083
  with gr.Row():
1084
  with gr.Column():
1085
- score_output = gr.Markdown(label="Scoring Results")
1086
-
1087
  with gr.Column():
1088
- chart_output = gr.Dataframe(
1089
- label="Dimension Scores",
1090
- headers=['Dimension', 'Score', 'Weight (%)'],
1091
- datatype=['str', 'number', 'number']
1092
- )
1093
-
1094
- # Example inputs
1095
- gr.Examples(
1096
- examples=[
1097
- [
1098
- """Frontend Developer - React.js
1099
- We are seeking a Frontend Developer with 2+ years of experience in React.js development.
1100
- Requirements:
1101
- - Bachelor's degree in Computer Science or related field
1102
- - Strong proficiency in JavaScript, HTML, CSS
1103
- - Experience with React.js, Redux, and modern frontend frameworks
1104
- - Knowledge of responsive design and cross-browser compatibility
1105
- - Experience with version control (Git)
1106
- - Understanding of RESTful APIs
1107
- - Strong problem-solving skills and attention to detail""",
1108
-
1109
- None, # No file upload in example
1110
-
1111
- """John Smith
1112
- Frontend Developer
1113
-
1114
- Education:
1115
- - Bachelor of Technology in Computer Science, ABC University (2020)
1116
-
1117
- Experience:
1118
- - Frontend Developer at Tech Solutions (2021-2024, 3 years)
1119
- - Developed responsive web applications using React.js and Redux
1120
- - Collaborated with backend developers to integrate RESTful APIs
1121
- - Implemented modern CSS frameworks and ensured cross-browser compatibility
1122
-
1123
- Skills:
1124
- - Frontend: JavaScript, HTML5, CSS3, React.js, Redux, Vue.js
1125
- - Tools: Git, Webpack, npm, VS Code
1126
- - Responsive Design, Cross-browser compatibility
1127
- - RESTful API integration
1128
 
1129
- Projects:
1130
- - E-commerce Website: Built using React.js with Redux for state management
1131
- - Portfolio Dashboard: Responsive web application with modern UI/UX"""
1132
- ],
1133
- [
1134
- """UI/UX Designer - Product Design
1135
- We are seeking a UI/UX Designer with 2+ years of experience in product design and user research.
1136
- Requirements:
1137
- - Bachelor's degree in Design, HCI, or related field
1138
- - Strong proficiency in Figma, Sketch, and Adobe Creative Suite
1139
- - Experience with user research and usability testing
1140
- - Knowledge of design systems and prototyping
1141
- - Understanding of frontend technologies (HTML, CSS, JavaScript)
1142
- - Strong visual design and interaction design skills
1143
- - Experience with A/B testing and data-driven design
1144
- - Excellent communication and collaboration skills""",
1145
-
1146
- None, # No file upload in example
1147
-
1148
- """Sarah Johnson
1149
- UI/UX Designer
1150
-
1151
- Education:
1152
- - Bachelor of Fine Arts in Graphic Design, Art Institute (2020)
1153
-
1154
- Experience:
1155
- - UI/UX Designer at Design Studio (2021-2024, 3 years)
1156
- - Created user interfaces and experiences for web and mobile applications
1157
- - Conducted user research and usability testing sessions
1158
- - Developed design systems and component libraries using Figma
1159
- - Collaborated with frontend developers on implementation
1160
-
1161
- Skills:
1162
- - Design Tools: Figma, Sketch, Adobe XD, Photoshop, Illustrator
1163
- - Prototyping: InVision, Principle, Framer
1164
- - Research: User interviews, A/B testing, Analytics
1165
- - Frontend: HTML, CSS, JavaScript basics
1166
- - Design: Visual design, Interaction design, Wireframing
1167
-
1168
- Projects:
1169
- - E-commerce Mobile App: Designed complete user experience with user research and prototyping
1170
- - SaaS Dashboard Redesign: Led design system creation and improved user engagement by 40%
1171
-
1172
- Certifications:
1173
- - Google UX Design Certificate
1174
- - Figma Advanced Certification"""
1175
- ]
1176
- ],
1177
- inputs=[job_desc_input, resume_file_input, resume_text_input]
1178
- )
1179
 
1180
  score_btn.click(
1181
  fn=score_resume,
1182
  inputs=[job_desc_input, resume_file_input, resume_text_input],
1183
- outputs=[score_output, chart_output]
1184
  )
1185
 
1186
  if __name__ == "__main__":
 
11
  import docx
12
  import io
13
  from pathlib import Path
14
+ import os
15
+ import google.generativeai as genai
16
+ from typing import Dict, Any
17
+
18
+ # Configure Gemini API
19
+ genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
20
 
21
  class ATSScorer:
22
  def __init__(self):
 
201
  ]
202
  }
203
 
204
+ def analyze_cv(self, cv_text: str, job_description: str) -> Dict[str, Any]:
205
+ """
206
+ Analyze CV against job description using Gemini AI
207
+ """
208
+ try:
209
+ prompt = f"""You are a smart and unbiased AI CV screening assistant. Your task is to evaluate how well a candidate's resume (CV) matches a job description. The job description may include one or more roles and may contain responsibilities, expectations, and skill requirements.
210
+
211
+ Carefully review both the CV and the Job Description, and provide the output as a **valid JSON object** with the following keys:
212
+ 1. **reasoning** (string): Provide a concise but insightful explanation of how well the candidate matches the job requirements β€” mention key matching points like role alignment, experience, and relevant technologies.
213
+ 2. **skills_available** (array of 6 or fewer strings): List up to 6 skills or competencies from the CV that strongly align with the job description.
214
+ 3. **missing** (array of 6 or fewer strings): List up to 6 important skills, experiences, or qualifications the candidate lacks based on the job description. If nothing is missing, return a single string in the array: "You are good to go".
215
+
216
+ CV:
217
+ \"\"\"
218
+ {cv_text}
219
+ \"\"\"
220
+
221
+ Job Description:
222
+ \"\"\"
223
+ {job_description}
224
+ \"\"\"
225
+ """
226
+
227
+ model = genai.GenerativeModel('gemini-2.0-flash-exp')
228
+ response = model.generate_content(prompt)
229
+
230
+ # Extract JSON from response
231
+ text = response.text
232
+ json_start = text.find("{")
233
+ json_end = text.rfind("}") + 1
234
+
235
+ if json_start != -1 and json_end != -1:
236
+ json_string = text[json_start:json_end]
237
+ parsed_result = json.loads(json_string)
238
+ return {"success": True, "result": parsed_result}
239
+ else:
240
+ return {"success": False, "message": "Could not parse JSON response"}
241
+
242
+ except Exception as e:
243
+ print(f'Error analyzing CV: {e}')
244
+ return {"success": False, "message": f"Error: {str(e)}"}
245
+
246
+ def format_analysis_output(self, analysis_result: Dict[str, Any]) -> str:
247
+ """
248
+ Format the analysis result for display in Gradio
249
+ """
250
+ if not analysis_result.get("success"):
251
+ return f"❌ **Error:** {analysis_result.get('message', 'Unknown error')}"
252
+
253
+ result = analysis_result["result"]
254
+
255
+ output = "## πŸ“Š **AI-Powered CV Analysis**\n\n"
256
+
257
+ # Reasoning section
258
+ output += "### πŸ” **Analysis & Reasoning**\n"
259
+ output += f"{result.get('reasoning', 'No reasoning provided')}\n\n"
260
+
261
+ # Skills available
262
+ output += "### βœ… **Matching Skills Found**\n"
263
+ skills = result.get('skills_available', [])
264
+ if skills:
265
+ for skill in skills:
266
+ output += f"β€’ {skill}\n"
267
+ else:
268
+ output += "β€’ No matching skills identified\n"
269
+ output += "\n"
270
+
271
+ # Missing skills
272
+ output += "### ⚠️ **Areas for Improvement**\n"
273
+ missing = result.get('missing', [])
274
+ if missing:
275
+ if len(missing) == 1 and missing[0] == "You are good to go":
276
+ output += "πŸŽ‰ **Excellent! You are good to go!**\n"
277
+ else:
278
+ for item in missing:
279
+ output += f"β€’ {item}\n"
280
+ else:
281
+ output += "β€’ No gaps identified\n"
282
+
283
+ return output
284
+
285
  def extract_text_from_pdf(self, pdf_file):
286
  """Extract text from PDF file"""
287
  try:
 
309
  if file is None:
310
  return ""
311
 
312
+ file_path = Path(file)
313
  file_extension = file_path.suffix.lower()
314
 
315
  try:
316
  if file_extension == '.pdf':
317
+ return self.extract_text_from_pdf(file)
318
  elif file_extension in ['.docx', '.doc']:
319
+ return self.extract_text_from_docx(file)
320
  else:
321
  raise Exception(f"Unsupported file format: {file_extension}. Please upload PDF or DOCX files.")
322
  except Exception as e:
 
324
 
325
  def preprocess_text(self, text):
326
  """Clean and preprocess text"""
327
+ # Convert to lowercase
328
+ text = text.lower()
 
329
  # Remove extra whitespace
330
  text = re.sub(r'\s+', ' ', text)
331
+ # Remove special characters but keep important ones
332
+ text = re.sub(r'[^\w\s\-\+\#\.]', ' ', text)
333
+ return text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
+ def extract_skills_from_text(self, text, domain=None):
336
+ """Extract skills from text based on domain"""
337
+ text = self.preprocess_text(text)
338
+ found_skills = []
339
+
340
+ # If domain is specified, prioritize skills from that domain
341
+ if domain and domain in self.skill_categories:
342
+ domain_skills = self.skill_categories[domain]
343
+ for skill in domain_skills:
344
+ if skill.lower() in text:
345
+ found_skills.append(skill)
346
+
347
+ # Also check all skill categories
348
+ for category, skills in self.skill_categories.items():
349
+ for skill in skills:
350
+ if skill.lower() in text and skill not in found_skills:
351
+ found_skills.append(skill)
352
+
353
+ return list(set(found_skills))
354
+
355
+ def detect_domain(self, text):
356
+ """Detect the primary domain/field from text"""
357
+ text = self.preprocess_text(text)
358
  domain_scores = {}
359
+
360
+ for domain, priorities in self.domain_indicators.items():
361
  score = 0
362
+ # High priority keywords
363
+ for keyword in priorities['high_priority']:
364
+ if keyword in text:
 
 
 
 
 
 
365
  score += 3
366
+ # Medium priority keywords
367
+ for keyword in priorities['medium_priority']:
368
+ if keyword in text:
369
+ score += 2
370
+ # Low priority keywords
371
+ for keyword in priorities['low_priority']:
372
+ if keyword in text:
373
  score += 1
374
+
375
  domain_scores[domain] = score
376
+
377
+ # Return the domain with highest score
378
+ if domain_scores:
379
  return max(domain_scores, key=domain_scores.get)
380
+ return None
381
+
382
+ def calculate_relevant_skills_score(self, job_description, resume):
383
+ """Calculate relevant skills score"""
384
+ # Detect domain from job description
385
+ job_domain = self.detect_domain(job_description)
386
+
387
+ # Extract skills from both texts
388
+ job_skills = self.extract_skills_from_text(job_description, job_domain)
389
+ resume_skills = self.extract_skills_from_text(resume, job_domain)
390
+
391
+ if not job_skills:
392
+ return 50 # Default score if no skills detected in job description
393
+
394
+ # Calculate overlap
395
+ matching_skills = set(job_skills) & set(resume_skills)
396
+ skill_match_ratio = len(matching_skills) / len(job_skills)
397
+
398
+ # Bonus for domain-specific skills
399
+ domain_bonus = 0
400
+ if job_domain and job_domain in self.skill_categories:
401
+ domain_skills = self.skill_categories[job_domain]
402
+ domain_matches = [skill for skill in matching_skills if skill in domain_skills]
403
+ domain_bonus = min(15, len(domain_matches) * 3)
404
+
405
+ # Calculate base score
406
+ base_score = min(85, skill_match_ratio * 100)
407
+ final_score = min(100, base_score + domain_bonus)
408
+
409
+ return final_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
 
411
+ def extract_experience_years(self, text):
412
  """Extract years of experience from text"""
413
+ text = self.preprocess_text(text)
414
+
415
+ # Patterns for experience extraction
416
  patterns = [
417
+ r'(\d+)\+?\s*years?\s*(?:of\s*)?experience',
418
+ r'(\d+)\+?\s*years?\s*(?:of\s*)?(?:work\s*)?experience',
419
+ r'experience\s*(?:of\s*)?(\d+)\+?\s*years?',
420
+ r'(\d+)\+?\s*years?\s*(?:in|of|with)',
421
+ r'over\s*(\d+)\s*years?',
422
+ r'more\s*than\s*(\d+)\s*years?'
423
  ]
424
+
425
  years = []
426
  for pattern in patterns:
427
  matches = re.findall(pattern, text)
428
  years.extend([int(match) for match in matches])
429
+
430
+ # Also look for date ranges in experience section
431
+ date_patterns = [
432
+ r'(\d{4})\s*-\s*(\d{4})',
433
+ r'(\d{4})\s*to\s*(\d{4})',
434
+ r'(\d{4})\s*–\s*(\d{4})'
435
+ ]
436
+
437
+ current_year = 2024
438
+ for pattern in date_patterns:
439
+ matches = re.findall(pattern, text)
440
+ for start, end in matches:
441
+ start_year = int(start)
442
+ end_year = int(end) if end != 'present' else current_year
443
+ if end_year > start_year:
444
+ years.append(end_year - start_year)
445
+
446
  return max(years) if years else 0
447
 
448
+ def calculate_work_experience_score(self, job_description, resume):
449
+ """Calculate work experience score"""
450
+ # Extract required experience from job description
451
+ job_experience = self.extract_experience_years(job_description)
452
+ resume_experience = self.extract_experience_years(resume)
453
+
454
+ # Look for experience-related keywords in resume
455
+ experience_keywords = ['experience', 'worked', 'employed', 'position', 'role', 'job', 'internship', 'intern']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
  resume_lower = resume.lower()
457
+ experience_mentions = sum(1 for keyword in experience_keywords if keyword in resume_lower)
458
+
459
+ # Calculate score based on experience match
460
+ if job_experience == 0:
461
+ # If no specific experience required, base on mentions
462
+ return min(80, 40 + experience_mentions * 8)
463
+
464
+ if resume_experience >= job_experience:
465
+ return min(100, 80 + (resume_experience - job_experience) * 2)
466
+ elif resume_experience >= job_experience * 0.7:
467
+ return 70
468
+ elif resume_experience >= job_experience * 0.5:
469
+ return 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470
  else:
471
+ return max(30, 30 + experience_mentions * 5)
 
472
 
473
+ def calculate_education_score(self, job_description, resume):
474
+ """Calculate education score"""
 
 
 
 
 
 
 
 
 
 
475
  resume_lower = resume.lower()
476
+ job_lower = job_description.lower()
477
+
478
+ # Check for degree types
479
+ degree_score = 0
480
+ for degree in self.education_patterns['degree_types']:
481
+ if degree in resume_lower:
482
+ degree_score += 20
483
+ break
484
+
485
+ # Check for education keywords
486
+ education_mentions = sum(1 for keyword in self.education_keywords if keyword in resume_lower)
487
+ education_score = min(30, education_mentions * 10)
488
+
489
+ # Check for undergraduate patterns
490
+ undergraduate_score = 0
491
+ for pattern in self.education_patterns['undergraduate']:
492
+ if pattern in resume_lower:
493
+ undergraduate_score = 15
494
+ break
495
+
496
+ # Year indicators
497
+ year_score = 0
498
+ for year in self.education_patterns['year_indicators']:
499
+ if year in resume_lower:
500
+ year_score = 10
501
+ break
502
+
503
+ # Bonus for relevant field
504
+ field_bonus = 0
505
+ domain = self.detect_domain(job_description)
506
+ if domain:
507
+ domain_keywords = [domain.replace('_', ' '), domain.replace('_', '')]
508
+ for keyword in domain_keywords:
509
+ if keyword in resume_lower:
510
+ field_bonus = 20
511
+ break
512
+
513
+ total_score = degree_score + education_score + undergraduate_score + year_score + field_bonus
514
+ return min(100, max(40, total_score))
515
+
516
+ def calculate_certifications_score(self, job_description, resume):
517
+ """Calculate certifications score"""
518
+ resume_lower = resume.lower()
519
+
520
  # Check for certification keywords
521
+ cert_mentions = sum(1 for keyword in self.certification_keywords if keyword in resume_lower)
522
+
523
+ # Look for specific certification patterns
524
+ cert_patterns = [
525
+ r'certified\s+\w+',
526
+ r'\w+\s+certification',
527
+ r'\w+\s+certificate',
528
+ r'licensed\s+\w+',
529
+ r'accredited\s+\w+'
530
+ ]
531
+
532
+ pattern_matches = 0
533
+ for pattern in cert_patterns:
534
+ if re.search(pattern, resume_lower):
535
+ pattern_matches += 1
536
+
537
+ # Domain-specific certifications
538
+ domain = self.detect_domain(job_description)
539
  domain_cert_bonus = 0
540
+ if domain == 'cybersecurity':
541
+ cyber_certs = ['cissp', 'ceh', 'oscp', 'comptia', 'security+']
542
+ for cert in cyber_certs:
543
+ if cert in resume_lower:
544
+ domain_cert_bonus += 15
545
+ elif domain == 'cloud':
546
+ cloud_certs = ['aws', 'azure', 'gcp', 'cloud practitioner']
547
+ for cert in cloud_certs:
548
+ if cert in resume_lower:
549
+ domain_cert_bonus += 15
550
+
551
+ base_score = min(60, cert_mentions * 15 + pattern_matches * 10)
552
+ total_score = min(100, base_score + domain_cert_bonus)
553
+
554
+ return max(40, total_score) if cert_mentions > 0 or pattern_matches > 0 else 40
555
+
556
+ def categorize_projects(self, project_text):
557
+ """Categorize projects based on content"""
558
+ project_text = self.preprocess_text(project_text)
559
+ categories = []
560
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  for category, keywords in self.project_categories.items():
562
+ for keyword in keywords:
563
+ if keyword in project_text:
564
+ categories.append(category)
565
+ break
566
+
567
+ return categories
568
+
569
+ def calculate_projects_score(self, job_description, resume):
570
+ """Calculate projects score"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
571
  resume_lower = resume.lower()
572
+
573
+ # Extract project mentions
574
+ project_mentions = sum(1 for keyword in self.project_keywords if keyword in resume_lower)
575
+
576
+ # Look for project sections
577
+ project_section_indicators = ['projects', 'personal projects', 'academic projects', 'work projects']
578
+ has_project_section = any(indicator in resume_lower for indicator in project_section_indicators)
579
+
580
+ # Categorize projects
581
+ project_categories = self.categorize_projects(resume)
582
+ job_domain = self.detect_domain(job_description)
583
+
584
+ # Calculate relevance
585
+ relevance_bonus = 0
586
+ if job_domain and job_domain in project_categories:
587
+ relevance_bonus = 25
588
+
589
+ # Calculate base score
590
+ base_score = min(50, project_mentions * 8)
591
+ section_bonus = 20 if has_project_section else 0
592
+ category_bonus = min(15, len(project_categories) * 3)
593
+
594
+ total_score = base_score + section_bonus + category_bonus + relevance_bonus
595
+ return min(100, max(30, total_score))
596
+
597
+ def calculate_keywords_match_score(self, job_description, resume):
598
+ """Calculate keyword matching score using semantic similarity"""
599
+ try:
600
+ # Preprocess texts
601
+ job_text = self.preprocess_text(job_description)
602
+ resume_text = self.preprocess_text(resume)
603
+
604
+ # Get embeddings
605
+ job_embedding = self.sentence_model.encode([job_text])
606
+ resume_embedding = self.sentence_model.encode([resume_text])
607
+
608
+ # Calculate cosine similarity
609
+ similarity = cosine_similarity(job_embedding, resume_embedding)[0][0]
610
+
611
+ # Convert to percentage
612
+ similarity_score = similarity * 100
613
+
614
+ # Add keyword overlap bonus
615
+ job_words = set(job_text.split())
616
+ resume_words = set(resume_text.split())
617
+
618
+ # Filter out common words
619
+ common_words = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'must', 'shall', 'a', 'an', 'this', 'that', 'these', 'those'}
620
+
621
+ job_words = job_words - common_words
622
+ resume_words = resume_words - common_words
623
+
624
+ if job_words:
625
+ overlap = len(job_words & resume_words) / len(job_words)
626
+ overlap_bonus = overlap * 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627
  else:
628
+ overlap_bonus = 0
629
+
630
+ final_score = min(100, similarity_score + overlap_bonus)
631
+ return max(30, final_score)
632
+
633
+ except Exception as e:
634
+ print(f"Error in keyword matching: {e}")
635
+ # Fallback to simple word matching
636
+ job_words = set(job_description.lower().split())
637
+ resume_words = set(resume.lower().split())
638
+
639
+ if job_words:
640
+ overlap = len(job_words & resume_words) / len(job_words)
641
+ return min(100, max(30, overlap * 100))
642
+ return 50
643
+
644
+ def calculate_tools_tech_score(self, job_description, resume):
645
+ """Calculate tools and technology score"""
646
+ # Extract tools and technologies from both texts
647
+ job_tools = self.extract_skills_from_text(job_description)
648
+ resume_tools = self.extract_skills_from_text(resume)
649
+
650
+ # Focus on technical skills
651
+ technical_categories = ['programming', 'databases', 'cloud', 'web_development', 'mobile_development', 'data_science', 'cybersecurity', 'ai_ml_engineering']
652
+
653
+ job_tech_skills = []
654
+ resume_tech_skills = []
655
+
656
+ for category in technical_categories:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
  if category in self.skill_categories:
658
+ category_skills = self.skill_categories[category]
659
+ job_tech_skills.extend([skill for skill in job_tools if skill in category_skills])
660
+ resume_tech_skills.extend([skill for skill in resume_tools if skill in category_skills])
661
+
662
+ if not job_tech_skills:
663
+ return 60 # Default score if no technical skills in job description
664
+
665
+ # Calculate overlap
666
+ matching_tools = set(job_tech_skills) & set(resume_tech_skills)
667
+ tool_match_ratio = len(matching_tools) / len(job_tech_skills)
668
+
669
+ # Bonus for having more tools than required
670
+ extra_tools_bonus = min(15, max(0, len(resume_tech_skills) - len(job_tech_skills)) * 2)
671
+
672
+ base_score = tool_match_ratio * 85
673
+ final_score = min(100, base_score + extra_tools_bonus)
674
+
675
+ return max(40, final_score)
676
+
677
+ def infer_soft_skills(self, text):
678
+ """Infer soft skills from interests and activities"""
679
+ text = self.preprocess_text(text)
680
+ inferred_skills = []
681
+
682
+ for skill, indicators in self.interest_skill_mapping.items():
683
+ for indicator in indicators:
684
+ if indicator in text:
685
+ inferred_skills.append(skill)
686
+ break
687
+
688
+ return inferred_skills
689
+
690
+ def calculate_soft_skills_score(self, job_description, resume):
691
+ """Calculate soft skills score"""
692
+ # Direct soft skills from skill categories
693
+ job_soft_skills = [skill for skill in self.skill_categories['soft_skills'] if skill in job_description.lower()]
694
+ resume_soft_skills = [skill for skill in self.skill_categories['soft_skills'] if skill in resume.lower()]
695
+
696
+ # Inferred soft skills from activities and interests
697
+ inferred_skills = self.infer_soft_skills(resume)
698
+
699
+ # Combine direct and inferred skills
700
+ all_resume_soft_skills = list(set(resume_soft_skills + inferred_skills))
701
+
702
+ if not job_soft_skills:
703
+ # If no specific soft skills mentioned in job, give credit for having any
704
+ return min(80, 50 + len(all_resume_soft_skills) * 5)
705
+
706
+ # Calculate overlap
707
+ matching_soft_skills = set(job_soft_skills) & set(all_resume_soft_skills)
708
+
709
+ if job_soft_skills:
710
+ soft_skill_ratio = len(matching_soft_skills) / len(job_soft_skills)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
  else:
712
+ soft_skill_ratio = 0.6 # Default ratio
713
+
714
+ # Bonus for having diverse soft skills
715
+ diversity_bonus = min(20, len(all_resume_soft_skills) * 3)
716
+
717
+ base_score = soft_skill_ratio * 70
718
+ final_score = min(100, base_score + diversity_bonus)
719
+
720
+ return max(50, final_score)
 
721
 
722
  def calculate_final_score(self, job_description, resume):
723
  """Calculate the weighted final score"""
724
  scores = {}
725
+
726
  # Calculate individual dimension scores
727
+ scores['relevant_skills'] = self.calculate_relevant_skills_score(job_description, resume)
728
+ scores['work_experience'] = self.calculate_work_experience_score(job_description, resume)
729
+ scores['education'] = self.calculate_education_score(job_description, resume)
730
+ scores['certifications'] = self.calculate_certifications_score(job_description, resume)
731
+ scores['projects'] = self.calculate_projects_score(job_description, resume)
732
+ scores['keywords_match'] = self.calculate_keywords_match_score(job_description, resume)
733
+ scores['tools_tech'] = self.calculate_tools_tech_score(job_description, resume)
734
+ scores['soft_skills'] = self.calculate_soft_skills_score(job_description, resume)
735
+
736
  # Calculate weighted final score
737
  final_score = sum(scores[dim] * self.weights[dim] for dim in scores)
738
+
739
  return final_score, scores
740
 
741
  # Initialize the scorer
742
  scorer = ATSScorer()
743
 
744
  def score_resume(job_description, resume_file, resume_text):
745
+ """Enhanced function to score resume and provide AI analysis"""
746
  if not job_description.strip():
747
+ return "Please provide a job description.", "", ""
748
 
749
  # Determine resume source
750
  resume_content = ""
 
752
  try:
753
  resume_content = scorer.extract_text_from_file(resume_file)
754
  if not resume_content.strip():
755
+ return "Could not extract text from the uploaded file. Please check the file format.", "", ""
756
  except Exception as e:
757
+ return f"Error processing file: {str(e)}", "", ""
758
  elif resume_text.strip():
759
  resume_content = resume_text.strip()
760
  else:
761
+ return "Please provide either a resume file (PDF/DOCX) or paste resume text.", "", ""
762
 
763
  try:
764
+ # Get ATS score
765
  final_score, dimension_scores = scorer.calculate_final_score(job_description, resume_content)
766
 
767
+ # Get AI analysis
768
+ analysis_result = scorer.analyze_cv(resume_content, job_description)
769
+ ai_analysis = scorer.format_analysis_output(analysis_result)
 
770
 
771
+ # Create ATS breakdown
772
+ ats_breakdown = f"""
773
  ## Overall ATS Score: {final_score:.1f}/100
774
 
 
 
 
 
 
775
  ### Dimension Breakdown:
776
  - **Relevant Skills** (25%): {dimension_scores['relevant_skills']:.1f}/100
777
  - **Work Experience** (20%): {dimension_scores['work_experience']:.1f}/100
 
788
  - **56-75**: Good match
789
  - **45-55**: Fair match
790
  - **Below 40**: Poor match
 
 
791
  """
792
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
793
  # Create score chart data
794
  chart_data = pd.DataFrame({
795
  'Dimension': [
 
810
  'Weight (%)': [25, 20, 10, 7, 10, 10, 10, 8]
811
  })
812
 
813
+ return ats_breakdown, ai_analysis, chart_data
814
 
815
  except Exception as e:
816
+ return f"Error processing resume: {str(e)}", "", ""
817
 
818
+ # Create Enhanced Gradio interface
819
+ with gr.Blocks(title="Enhanced ATS Resume Scorer", theme=gr.themes.Soft()) as demo:
820
  gr.Markdown("""
821
+ # 🎯 Enhanced ATS Resume Scorer with AI Analysis
822
 
823
+ This tool provides **dual analysis** of your resume:
824
+ 1. **ATS Score** - Technical matching across 8 dimensions
825
+ 2. **AI Analysis** - Intelligent insights and recommendations
 
 
 
 
 
 
 
 
826
 
827
  **πŸ“„ Resume Input:** Upload PDF/DOCX file OR paste text manually
828
  **πŸ“‹ Job Description:** Paste as text
 
856
  max_lines=15
857
  )
858
 
859
+ score_btn = gr.Button("πŸš€ Analyze Resume", variant="primary", size="lg")
860
 
861
  with gr.Row():
862
  with gr.Column():
863
+ ats_output = gr.Markdown(label="ATS Scoring Results")
864
+
865
  with gr.Column():
866
+ ai_output = gr.Markdown(label="AI Analysis Results")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
867
 
868
+ with gr.Row():
869
+ chart_output = gr.Dataframe(
870
+ label="Dimension Scores",
871
+ headers=['Dimension', 'Score', 'Weight (%)'],
872
+ datatype=['str', 'number', 'number']
873
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
 
875
  score_btn.click(
876
  fn=score_resume,
877
  inputs=[job_desc_input, resume_file_input, resume_text_input],
878
+ outputs=[ats_output, ai_output, chart_output]
879
  )
880
 
881
  if __name__ == "__main__":