Spaces:

KavyaBansal
/

CV_Scorer

Running

App Files Files Community

KavyaBansal commited on about 18 hours ago

Commit

914198f

verified ·

1 Parent(s): 69220e9

Update app.py

Browse files

Files changed (1) hide show

app.py +495 -800

app.py CHANGED Viewed

@@ -11,6 +11,12 @@ import PyPDF2
 import docx
 import io
 from pathlib import Path
 class ATSScorer:
     def __init__(self):
@@ -195,6 +201,87 @@ class ATSScorer:
             ]
         }
     def extract_text_from_pdf(self, pdf_file):
         """Extract text from PDF file"""
         try:
@@ -222,14 +309,14 @@ class ATSScorer:
         if file is None:
             return ""
-        file_path = Path(file.name)
         file_extension = file_path.suffix.lower()
         try:
             if file_extension == '.pdf':
-                return self.extract_text_from_pdf(file.name)
             elif file_extension in ['.docx', '.doc']:
-                return self.extract_text_from_docx(file.name)
             else:
                 raise Exception(f"Unsupported file format: {file_extension}. Please upload PDF or DOCX files.")
         except Exception as e:
@@ -237,699 +324,427 @@ class ATSScorer:
     def preprocess_text(self, text):
         """Clean and preprocess text"""
-        if not text:
-            return ""
-        text = text.lower().strip()
         # Remove extra whitespace
         text = re.sub(r'\s+', ' ', text)
-        return text
-    def detect_job_domain(self, job_desc):
-        """Detect the primary domain of the job with improved priority-based scoring"""
-        job_lower = job_desc.lower()
-        domain_scores = {}
-        for domain, indicators in self.domain_indicators.items():
-            score = 0
-            # High priority indicators (job titles, specific roles) - weight 10
-            for indicator in indicators['high_priority']:
-                if indicator in job_lower:
-                    score += 10
-            # Medium priority indicators (domain-specific terms) - weight 3
-            for indicator in indicators['medium_priority']:
-                if indicator in job_lower:
-                    score += 3
-            # Low priority indicators (tools, technologies) - weight 1
-            for indicator in indicators['low_priority']:
-                if indicator in job_lower:
-                    score += 1
-            domain_scores[domain] = score
-        # Return the domain with highest score, or 'general' if no matches
-        if max(domain_scores.values()) > 0:
-            return max(domain_scores, key=domain_scores.get)
-        else:
-            return 'general'
-    def detect_resume_domain(self, resume):
-        """Detect the primary domain of the resume with improved priority-based scoring"""
-        resume_lower = resume.lower()
         domain_scores = {}
-        for domain, indicators in self.domain_indicators.items():
             score = 0
-            # High priority indicators (job titles, specific roles) - weight 10
-            for indicator in indicators['high_priority']:
-                if indicator in resume_lower:
-                    score += 10
-            # Medium priority indicators (domain-specific terms) - weight 3
-            for indicator in indicators['medium_priority']:
-                if indicator in resume_lower:
                     score += 3
-            # Low priority indicators (tools, technologies) - weight 1
-            for indicator in indicators['low_priority']:
-                if indicator in resume_lower:
                     score += 1
             domain_scores[domain] = score
-        # Return the domain with highest score, or 'general' if no matches
-        if max(domain_scores.values()) > 0:
             return max(domain_scores, key=domain_scores.get)
-        else:
-            return 'general'
-    def calculate_domain_compatibility(self, job_domain, resume_domain):
-        """Calculate compatibility between job and resume domains"""
-        if job_domain == resume_domain:
-            return 1.0
-        # More generous domain compatibility matrix
-        compatibility_matrix = {
-            ('cybersecurity', 'web_development'): 0.7,
-            ('cybersecurity', 'mobile_development'): 0.6,
-            ('cybersecurity', 'data_science'): 0.8,
-            ('cybersecurity', 'ai_ml_engineering'): 0.8,
-            ('web_development', 'mobile_development'): 0.9,
-            ('web_development', 'data_science'): 0.8,
-            ('web_development', 'ui_ux_design'): 0.9,
-            ('mobile_development', 'data_science'): 0.7,
-            ('mobile_development', 'ui_ux_design'): 0.8,
-            ('devops', 'web_development'): 0.8,
-            ('devops', 'cybersecurity'): 0.7,
-            ('devops', 'ai_ml_engineering'): 0.8,
-            ('game_development', 'web_development'): 0.7,
-            ('game_development', 'mobile_development'): 0.8,
-            ('ui_ux_design', 'web_development'): 0.9,
-            ('ui_ux_design', 'mobile_development'): 0.8,
-            ('ui_ux_design', 'marketing'): 0.7,
-            ('business_analysis', 'consultancy'): 0.9,
-            ('business_analysis', 'marketing'): 0.7,
-            ('business_analysis', 'data_science'): 0.7,
-            ('marketing', 'consultancy'): 0.8,
-            ('marketing', 'business_analysis'): 0.7,
-            ('marketing', 'ui_ux_design'): 0.7,
-            ('consultancy', 'business_analysis'): 0.9,
-            ('consultancy', 'marketing'): 0.8,
-            ('ai_ml_engineering', 'data_science'): 0.95,
-            ('ai_ml_engineering', 'web_development'): 0.8,
-            ('ai_ml_engineering', 'cybersecurity'): 0.8,
-            ('data_science', 'ai_ml_engineering'): 0.95,
-        }
-        # Check both directions
-        compatibility = compatibility_matrix.get((job_domain, resume_domain),
-                                                compatibility_matrix.get((resume_domain, job_domain), 0.5))
-        return compatibility
-    def extract_years_of_experience(self, text):
         """Extract years of experience from text"""
-        text = text.lower()
         patterns = [
-            r'(\d+)\+?\s*years?\s+(?:of\s+)?experience',
-            r'(\d+)\+?\s*yrs?\s+(?:of\s+)?experience',
-            r'experience.*?(\d+)\+?\s*years?',
-            r'(\d+)\+?\s*years?\s+in\s+'
         ]
         years = []
         for pattern in patterns:
             matches = re.findall(pattern, text)
             years.extend([int(match) for match in matches])
         return max(years) if years else 0
-    def extract_contextual_keywords(self, text, job_domain="general"):
-        """Extract keywords with domain context awareness"""
-        text = self.preprocess_text(text)
-        keywords = set()
-        # Get relevant categories based on domain
-        relevant_categories = []
-        if job_domain == 'cybersecurity':
-            relevant_categories = ['cybersecurity', 'programming']
-        elif job_domain == 'web_development':
-            relevant_categories = ['web_development', 'programming', 'databases']
-        elif job_domain == 'mobile_development':
-            relevant_categories = ['mobile_development', 'programming']
-        elif job_domain == 'data_science':
-            relevant_categories = ['data_science', 'programming', 'databases']
-        elif job_domain == 'ui_ux_design':
-            relevant_categories = ['ui_ux_design', 'web_development']
-        elif job_domain == 'business_analysis':
-            relevant_categories = ['business_analysis', 'databases']
-        elif job_domain == 'marketing':
-            relevant_categories = ['marketing', 'ui_ux_design']
-        elif job_domain == 'consultancy':
-            relevant_categories = ['consultancy', 'business_analysis']
-        elif job_domain == 'ai_ml_engineering':
-            relevant_categories = ['ai_ml_engineering', 'data_science', 'programming']
-        else:
-            relevant_categories = ['programming', 'databases', 'cloud', 'web_development']
-        # Extract keywords from relevant categories
-        for category in relevant_categories:
-            if category in self.skill_categories:
-                for skill in self.skill_categories[category]:
-                    if skill in text:
-                        keywords.add(skill)
-        # Use spaCy for entity extraction if available
-        if self.nlp:
-            doc = self.nlp(text)
-            for ent in doc.ents:
-                if ent.label_ in ['ORG', 'PRODUCT', 'LANGUAGE']:
-                    keywords.add(ent.text.lower())
-        return list(keywords)
-    def calculate_semantic_similarity(self, text1, text2):
-        """Calculate semantic similarity between two texts with lower threshold"""
-        if not text1 or not text2:
-            return 0.0
-        embeddings = self.sentence_model.encode([text1, text2])
-        similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
-        # Lower threshold for more inclusive matching
-        if similarity < 0.15:
-            return 0.0
-        return max(0, similarity)
-    def score_relevant_skills(self, job_desc, resume):
-        """Score skill relevance with more generous scoring"""
-        job_domain = self.detect_job_domain(job_desc)
-        resume_domain = self.detect_resume_domain(resume)
-        job_keywords = set(self.extract_contextual_keywords(job_desc, job_domain))
-        resume_keywords = set(self.extract_contextual_keywords(resume, job_domain))
-        if not job_keywords:
-            # More generous fallback using semantic similarity
-            semantic_score = self.calculate_semantic_similarity(job_desc, resume) * 120
-            return min(80, semantic_score)
-        # Exact keyword matching
-        exact_matches = len(job_keywords.intersection(resume_keywords))
-        exact_score = exact_matches / len(job_keywords)
-        # Semantic similarity with higher weight
-        semantic_score = self.calculate_semantic_similarity(job_desc, resume)
-        # More generous base scoring
-        base_score = (exact_score * 0.6 + semantic_score * 0.4) * 120
-        # Apply domain compatibility with minimal penalty
-        domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
-        final_score = base_score * (0.7 + 0.3 * domain_compatibility)  # Minimum 70% of base score
-        return min(100, final_score)
-    def score_work_experience(self, job_desc, resume):
-        """Score work experience with more generous scoring"""
-        resume_years = self.extract_years_of_experience(resume)
-        job_years = self.extract_years_of_experience(job_desc)
-        job_domain = self.detect_job_domain(job_desc)
-        resume_domain = self.detect_resume_domain(resume)
-        # Years of experience score
-        if job_years > 0:
-            years_score = min(100, (resume_years / job_years) * 120)
-        else:
-            years_score = 60 if resume_years > 0 else 20
-        # Domain-aware semantic similarity
-        semantic_score = self.calculate_semantic_similarity(job_desc, resume) * 120
-        # Apply domain compatibility
-        domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
-        # Combine scores with more generous weighting
-        base_score = (years_score * 0.4 + semantic_score * 0.6)
-        final_score = base_score * (0.7 + 0.3 * domain_compatibility)
-        return min(100, final_score)
-    def score_education(self, job_desc, resume):
-        """Score education relevance - Enhanced for undergraduates"""
         resume_lower = resume.lower()
-        job_lower = job_desc.lower()
-        # Extract required degree from job description
-        required_degrees = []
-        for degree_type in self.education_patterns['degree_types']:
-            if degree_type in job_lower:
-                required_degrees.append(degree_type)
-        # Check if candidate is undergraduate
-        is_undergraduate = any(pattern in resume_lower for pattern in self.education_patterns['undergraduate'])
-        # Determine candidate's year if undergraduate
-        year_score_multiplier = 1.0
-        if is_undergraduate:
-            if any(year in resume_lower for year in ['final year', 'fourth year', 'senior']):
-                year_score_multiplier = 0.95
-            elif any(year in resume_lower for year in ['third year', 'junior']):
-                year_score_multiplier = 0.85
-            elif any(year in resume_lower for year in ['second year', 'sophomore']):
-                year_score_multiplier = 0.70
-            elif any(year in resume_lower for year in ['first year', 'freshman']):
-                year_score_multiplier = 0.55
-        # Check degree match with more generous scoring
-        degree_match_score = 0
-        if required_degrees:
-            candidate_degrees = []
-            for degree_type in self.education_patterns['degree_types']:
-                if degree_type in resume_lower:
-                    candidate_degrees.append(degree_type)
-            if candidate_degrees:
-                if any(req_deg in candidate_degrees for req_deg in required_degrees):
-                    degree_match_score = 85
-                elif any(deg in ['btech', 'be', 'bs', 'bachelor'] for deg in candidate_degrees) and \
-                     any(deg in ['bachelor', 'btech', 'be', 'bs'] for deg in required_degrees):
-                    degree_match_score = 80
-                elif any(deg in ['master', 'ms', 'ma', 'mtech', 'mba'] for deg in candidate_degrees) and \
-                     any(deg in ['bachelor', 'btech', 'be', 'bs'] for deg in required_degrees):
-                    degree_match_score = 90
-                else:
-                    degree_match_score = 50
-            else:
-                degree_match_score = 20
         else:
-            education_present = any(keyword in resume_lower for keyword in self.education_keywords)
-            degree_match_score = 60 if education_present else 20
-        # Apply undergraduate multiplier
-        if is_undergraduate and degree_match_score > 0:
-            degree_match_score *= year_score_multiplier
-        # Higher semantic similarity bonus
-        semantic_bonus = self.calculate_semantic_similarity(job_desc, resume) * 20
-        final_score = min(100, degree_match_score + semantic_bonus)
-        return final_score
-    def score_certifications(self, job_desc, resume):
-        """Score certifications and courses (7% weight)"""
         resume_lower = resume.lower()
-        job_lower = job_desc.lower()
         # Check for certification keywords
-        cert_count = sum(1 for keyword in self.certification_keywords if keyword in resume_lower)
-        # Return 0 if no certifications found
-        if cert_count == 0:
-            return 0
-        # Check for domain-specific certifications
-        job_domain = self.detect_job_domain(job_desc)
         domain_cert_bonus = 0
-        if job_domain == 'cybersecurity':
-            cyber_certs = ['oscp', 'cissp', 'ceh', 'giac', 'sans', 'security+']
-            domain_cert_bonus = sum(20 for cert in cyber_certs if cert in resume_lower)
-        elif job_domain == 'web_development':
-            web_certs = ['aws certified', 'google cloud', 'azure certified', 'mongodb certified']
-            domain_cert_bonus = sum(15 for cert in web_certs if cert in resume_lower)
-        elif job_domain == 'data_science':
-            data_certs = ['tensorflow developer', 'aws machine learning', 'google data engineer', 'microsoft azure ai']
-            domain_cert_bonus = sum(15 for cert in data_certs if cert in resume_lower)
-        elif job_domain == 'ui_ux_design':
-            design_certs = ['adobe certified', 'figma certified', 'ux certification', 'design thinking', 'google ux']
-            domain_cert_bonus = sum(15 for cert in design_certs if cert in resume_lower)
-        elif job_domain == 'business_analysis':
-            ba_certs = ['cbap', 'ccba', 'pmp', 'agile certified', 'scrum master', 'business analysis']
-            domain_cert_bonus = sum(15 for cert in ba_certs if cert in resume_lower)
-        elif job_domain == 'marketing':
-            marketing_certs = ['google ads', 'facebook blueprint', 'hubspot', 'google analytics', 'digital marketing']
-            domain_cert_bonus = sum(15 for cert in marketing_certs if cert in resume_lower)
-        elif job_domain == 'consultancy':
-            consulting_certs = ['pmp', 'prince2', 'change management', 'lean six sigma', 'agile certified']
-            domain_cert_bonus = sum(15 for cert in consulting_certs if cert in resume_lower)
-        elif job_domain == 'ai_ml_engineering':
-            ai_certs = ['tensorflow developer', 'aws machine learning', 'google cloud ml', 'nvidia deep learning', 'microsoft ai']
-            domain_cert_bonus = sum(15 for cert in ai_certs if cert in resume_lower)
-        # More generous base score for having certifications
-        base_score = min(60, cert_count * 25)
-        # Relevance to job description
-        relevance_score = self.calculate_semantic_similarity(job_desc, resume) * 30
-        return min(100, base_score + relevance_score + domain_cert_bonus)
-    def classify_project_category(self, project_text):
-        """Classify project into categories based on description"""
-        project_lower = project_text.lower()
-        category_scores = {}
         for category, keywords in self.project_categories.items():
-            score = sum(1 for keyword in keywords if keyword in project_lower)
-            if score > 0:
-                category_scores[category] = score
-        if not category_scores:
-            return 'general'
-        return max(category_scores, key=category_scores.get)
-    def extract_project_keywords(self, project_text, job_domain):
-        """Extract technical keywords from project description"""
-        project_lower = project_text.lower()
-        keywords = set()
-        # Get relevant categories based on job domain
-        relevant_categories = []
-        if job_domain == 'cybersecurity':
-            relevant_categories = ['cybersecurity', 'programming']
-        elif job_domain == 'web_development':
-            relevant_categories = ['web_development', 'programming', 'databases']
-        elif job_domain == 'mobile_development':
-            relevant_categories = ['mobile_development', 'programming']
-        elif job_domain == 'data_science':
-            relevant_categories = ['data_science', 'programming', 'databases']
-        elif job_domain == 'ui_ux_design':
-            relevant_categories = ['ui_ux_design', 'web_development']
-        elif job_domain == 'business_analysis':
-            relevant_categories = ['business_analysis', 'databases']
-        elif job_domain == 'marketing':
-            relevant_categories = ['marketing', 'ui_ux_design']
-        elif job_domain == 'consultancy':
-            relevant_categories = ['consultancy', 'business_analysis']
-        elif job_domain == 'ai_ml_engineering':
-            relevant_categories = ['ai_ml_engineering', 'data_science', 'programming']
-        else:
-            relevant_categories = ['programming', 'databases', 'cloud']
-        # Extract keywords from relevant categories
-        for category in relevant_categories:
-            if category in self.skill_categories:
-                for skill in self.skill_categories[category]:
-                    if skill in project_lower:
-                        keywords.add(skill)
-        return keywords
-    def score_projects(self, job_desc, resume):
-        """Score projects with stricter keyword and category matching"""
         resume_lower = resume.lower()
-        job_lower = job_desc.lower()
-        job_domain = self.detect_job_domain(job_desc)
-        # Extract job keywords for matching
-        job_keywords = set(self.extract_contextual_keywords(job_desc, job_domain))
-        # Find project sections
-        project_sections = []
-        lines = resume.split('\n')
-        in_project_section = False
-        current_project = ""
-        for line in lines:
-            line_lower = line.lower().strip()
-            if any(keyword in line_lower for keyword in self.project_keywords):
-                if current_project:
-                    project_sections.append(current_project)
-                current_project = line
-                in_project_section = True
-            elif in_project_section:
-                if line.strip() and not line.startswith('-') and not any(section_word in line_lower for section_word in ['experience', 'education', 'skills', 'certification']):
-                    current_project += " " + line
-                elif line.strip().startswith('-') or not line.strip():
-                    current_project += " " + line
-                else:
-                    if current_project:
-                        project_sections.append(current_project)
-                        current_project = ""
-                    in_project_section = False
-        if current_project:
-            project_sections.append(current_project)
-        # If no projects found, return very low score
-        if not project_sections:
-            project_count = sum(1 for keyword in self.project_keywords if keyword in resume_lower)
-            return 5 if project_count > 0 else 0
-        # Analyze each project
-        total_project_score = 0
-        project_scores = []
-        for project in project_sections:
-            project_score = 0
-            # Step 1: Direct keyword matching (highest priority)
-            project_keywords = self.extract_project_keywords(project, job_domain)
-            if job_keywords:
-                keyword_matches = len(job_keywords.intersection(project_keywords))
-                keyword_match_ratio = keyword_matches / len(job_keywords)
-                if keyword_match_ratio >= 0.5:  # 50% or more keywords match
-                    project_score = 80 + (keyword_match_ratio - 0.5) * 40  # 80-100 points
-                elif keyword_match_ratio >= 0.3:  # 30-49% keywords match
-                    project_score = 60 + (keyword_match_ratio - 0.3) * 100  # 60-80 points
-                elif keyword_match_ratio >= 0.1:  # 10-29% keywords match
-                    project_score = 30 + (keyword_match_ratio - 0.1) * 150  # 30-60 points
-                elif keyword_matches > 0:  # Some keywords match but less than 10%
-                    project_score = 20
-                else:
-                    # Step 2: Category matching (if no keyword matches)
-                    project_category = self.classify_project_category(project)
-                    # Map project categories to job domains
-                    category_domain_mapping = {
-                        'web_development': 'web_development',
-                        'mobile_development': 'mobile_development',
-                        'data_science': 'data_science',
-                        'cybersecurity': 'cybersecurity',
-                        'game_development': 'game_development',
-                        'devops': 'devops',
-                        'api_backend': 'web_development',
-                        'desktop_application': 'general',
-                        'ui_ux_design': 'ui_ux_design',
-                        'business_analysis': 'business_analysis',
-                        'marketing': 'marketing',
-                        'ai_ml_engineering': 'ai_ml_engineering'
-                    }
-                    project_domain = category_domain_mapping.get(project_category, 'general')
-                    if project_domain == job_domain:
-                        project_score = 40  # Same domain but no keyword matches
-                    elif project_domain != 'general' and job_domain != 'general':
-                        # Check domain compatibility
-                        compatibility = self.calculate_domain_compatibility(job_domain, project_domain)
-                        project_score = 20 * compatibility  # 0-20 points based on compatibility
-                    else:
-                        project_score = 10  # Very low score for unrelated projects
-            else:
-                # If no job keywords found, use semantic similarity as fallback
-                semantic_score = self.calculate_semantic_similarity(job_desc, project)
-                project_score = semantic_score * 50  # Max 50 points from semantic similarity
-            project_scores.append(project_score)
-        # Calculate final score based on best projects
-        if project_scores:
-            # Take average of all projects but give more weight to best projects
-            project_scores.sort(reverse=True)
-            if len(project_scores) == 1:
-                total_project_score = project_scores[0]
-            elif len(project_scores) == 2:
-                total_project_score = (project_scores[0] * 0.7 + project_scores[1] * 0.3)
             else:
-                # For 3+ projects, weight the top 3
-                total_project_score = (project_scores[0] * 0.5 +
-                                     project_scores[1] * 0.3 +
-                                     project_scores[2] * 0.2)
-        return min(100, total_project_score)
-    def score_keywords_match(self, job_desc, resume):
-        """Score keyword matching with more generous scoring"""
-        job_domain = self.detect_job_domain(job_desc)
-        job_keywords = self.extract_contextual_keywords(job_desc, job_domain)
-        resume_keywords = self.extract_contextual_keywords(resume, job_domain)
-        if not job_keywords:
-            # More generous fallback using semantic similarity
-            return min(70, self.calculate_semantic_similarity(job_desc, resume) * 140)
-        matches = len(set(job_keywords).intersection(set(resume_keywords)))
-        if matches == 0:
-            # Give more credit for semantic similarity even with no exact matches
-            return min(35, self.calculate_semantic_similarity(job_desc, resume) * 80)
-        # Apply domain compatibility with more generous scoring
-        resume_domain = self.detect_resume_domain(resume)
-        domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
-        base_score = (matches / len(job_keywords)) * 120
-        final_score = base_score * (0.7 + 0.3 * domain_compatibility)
-        return min(100, final_score)
-    def score_tools_tech(self, job_desc, resume):
-        """Score tools and technologies with more generous scoring"""
-        job_domain = self.detect_job_domain(job_desc)
-        resume_domain = self.detect_resume_domain(resume)
-        # Select relevant tech categories based on job domain
-        if job_domain == 'cybersecurity':
-            tech_categories = ['cybersecurity', 'programming']
-        elif job_domain == 'web_development':
-            tech_categories = ['web_development', 'programming', 'databases', 'cloud']
-        elif job_domain == 'mobile_development':
-            tech_categories = ['mobile_development', 'programming']
-        elif job_domain == 'data_science':
-            tech_categories = ['data_science', 'programming', 'databases']
-        elif job_domain == 'ui_ux_design':
-            tech_categories = ['ui_ux_design', 'web_development']
-        elif job_domain == 'business_analysis':
-            tech_categories = ['business_analysis', 'databases']
-        elif job_domain == 'marketing':
-            tech_categories = ['marketing', 'ui_ux_design']
-        elif job_domain == 'consultancy':
-            tech_categories = ['consultancy', 'business_analysis']
-        elif job_domain == 'ai_ml_engineering':
-            tech_categories = ['ai_ml_engineering', 'data_science', 'programming']
-        else:
-            tech_categories = ['programming', 'databases', 'cloud']
-        job_tech = set()
-        resume_tech = set()
-        for category in tech_categories:
             if category in self.skill_categories:
-                for tech in self.skill_categories[category]:
-                    if tech in job_desc.lower():
-                        job_tech.add(tech)
-                    if tech in resume.lower():
-                        resume_tech.add(tech)
-        if not job_tech:
-            # More generous fallback using semantic similarity
-            return min(60, self.calculate_semantic_similarity(job_desc, resume) * 120)
-        matches = len(job_tech.intersection(resume_tech))
-        if matches == 0:
-            # Give more credit for having any relevant tech
-            if resume_tech:
-                return min(40, len(resume_tech) * 8)
-            return 15  # Small base score instead of 0
-        # Apply domain compatibility with more generous scoring
-        domain_compatibility = self.calculate_domain_compatibility(job_domain, resume_domain)
-        base_score = (matches / len(job_tech)) * 120
-        final_score = base_score * (0.7 + 0.3 * domain_compatibility)
-        return min(100, final_score)
-    def score_soft_skills(self, job_desc, resume):
-        """Score soft skills indicators - Enhanced with interest-based inference"""
-        resume_lower = resume.lower()
-        job_lower = job_desc.lower()
-        # Direct soft skills mentioned
-        direct_soft_skills = self.skill_categories['soft_skills']
-        job_soft_count = sum(1 for skill in direct_soft_skills if skill in job_lower)
-        resume_soft_count = sum(1 for skill in direct_soft_skills if skill in resume_lower)
-        # Calculate direct soft skills score
-        if job_soft_count > 0:
-            direct_score = min(50, (resume_soft_count / job_soft_count) * 50)
-        else:
-            direct_score = min(40, resume_soft_count * 10)
-        # Inferred soft skills from interests and activities
-        inferred_skills = set()
-        resume_text = resume_lower
-        for skill_type, indicators in self.interest_skill_mapping.items():
-            skill_indicators_found = sum(1 for indicator in indicators if indicator in resume_text)
-            if skill_indicators_found > 0:
-                inferred_skills.add(skill_type)
-        # Map inferred skills to job requirements
-        job_skill_requirements = set()
-        if 'leadership' in job_lower or 'lead' in job_lower or 'manage' in job_lower:
-            job_skill_requirements.add('leadership')
-        if 'team' in job_lower or 'collaboration' in job_lower:
-            job_skill_requirements.add('teamwork')
-        if 'communication' in job_lower or 'present' in job_lower:
-            job_skill_requirements.add('communication')
-        if 'creative' in job_lower or 'innovation' in job_lower or 'design' in job_lower:
-            job_skill_requirements.add('creativity')
-        if 'problem' in job_lower or 'analytical' in job_lower or 'analysis' in job_lower:
-            job_skill_requirements.add('analytical')
-        if 'dedicated' in job_lower or 'commitment' in job_lower:
-            job_skill_requirements.add('dedication')
-        if 'adapt' in job_lower or 'flexible' in job_lower:
-            job_skill_requirements.add('adaptability')
-        # Score inferred skills
-        inferred_score = 0
-        if job_skill_requirements:
-            matched_inferred = job_skill_requirements.intersection(inferred_skills)
-            if matched_inferred:
-                inferred_score = (len(matched_inferred) / len(job_skill_requirements)) * 35
         else:
-            inferred_score = min(25, len(inferred_skills) * 5)
-        # Activity-based bonus scoring
-        activity_bonus = 0
-        high_value_activities = ['ncc', 'captain', 'president', 'volunteer', 'community service', 'marathon', 'debate']
-        activity_count = sum(1 for activity in high_value_activities if activity in resume_lower)
-        activity_bonus = min(15, activity_count * 3)
-        final_score = min(100, direct_score + inferred_score + activity_bonus)
-        return final_score
     def calculate_final_score(self, job_description, resume):
         """Calculate the weighted final score"""
         scores = {}
         # Calculate individual dimension scores
-        scores['relevant_skills'] = self.score_relevant_skills(job_description, resume)
-        scores['work_experience'] = self.score_work_experience(job_description, resume)
-        scores['education'] = self.score_education(job_description, resume)
-        scores['certifications'] = self.score_certifications(job_description, resume)
-        scores['projects'] = self.score_projects(job_description, resume)
-        scores['keywords_match'] = self.score_keywords_match(job_description, resume)
-        scores['tools_tech'] = self.score_tools_tech(job_description, resume)
-        scores['soft_skills'] = self.score_soft_skills(job_description, resume)
         # Calculate weighted final score
         final_score = sum(scores[dim] * self.weights[dim] for dim in scores)
         return final_score, scores
 # Initialize the scorer
 scorer = ATSScorer()
 def score_resume(job_description, resume_file, resume_text):
-    """Main function to score resume against job description"""
     if not job_description.strip():
-        return "Please provide a job description.", ""
     # Determine resume source
     resume_content = ""
@@ -937,31 +752,26 @@ def score_resume(job_description, resume_file, resume_text):
         try:
             resume_content = scorer.extract_text_from_file(resume_file)
             if not resume_content.strip():
-                return "Could not extract text from the uploaded file. Please check the file format.", ""
         except Exception as e:
-            return f"Error processing file: {str(e)}", ""
     elif resume_text.strip():
         resume_content = resume_text.strip()
     else:
-        return "Please provide either a resume file (PDF/DOCX) or paste resume text.", ""
     try:
         final_score, dimension_scores = scorer.calculate_final_score(job_description, resume_content)
-        # Detect domains for additional context
-        job_domain = scorer.detect_job_domain(job_description)
-        resume_domain = scorer.detect_resume_domain(resume_content)
-        domain_compatibility = scorer.calculate_domain_compatibility(job_domain, resume_domain)
-        # Create detailed breakdown
-        breakdown = f"""
 ## Overall ATS Score: {final_score:.1f}/100
-### Domain Analysis:
-- **Job Domain**: {job_domain.replace('_', ' ').title()}
-- **Resume Domain**: {resume_domain.replace('_', ' ').title()}
-- **Domain Compatibility**: {domain_compatibility:.1%}
 ### Dimension Breakdown:
 - **Relevant Skills** (25%): {dimension_scores['relevant_skills']:.1f}/100
 - **Work Experience** (20%): {dimension_scores['work_experience']:.1f}/100
@@ -978,32 +788,8 @@ def score_resume(job_description, resume_file, resume_text):
 - **56-75**: Good match
 - **45-55**: Fair match
 - **Below 40**: Poor match
-### Recommendations:
 """
-        # Add recommendations based on low scores and domain mismatch
-        recommendations = []
-        if domain_compatibility < 0.5:
-            recommendations.append(f"- **Domain Mismatch**: Your resume appears to be focused on {resume_domain.replace('_', ' ')} while the job is in {job_domain.replace('_', ' ')}. Consider highlighting transferable skills.")
-        if dimension_scores['relevant_skills'] < 70:
-            recommendations.append("- **Skills**: Add more job-specific technical skills to your resume")
-        if dimension_scores['work_experience'] < 70:
-            recommendations.append("- **Experience**: Highlight more relevant work experience or projects")
-        if dimension_scores['keywords_match'] < 70:
-            recommendations.append("- **Keywords**: Include more job-specific keywords throughout your resume")
-        if dimension_scores['tools_tech'] < 70:
-            recommendations.append("- **Technology**: Emphasize technical tools and technologies mentioned in the job description")
-        if dimension_scores['projects'] < 70:
-            recommendations.append("- **Projects**: Add more relevant projects that demonstrate required skills and use job-specific technologies")
-        if not recommendations:
-            recommendations.append("- **Excellent!** Your resume is well-aligned with the job requirements")
-        breakdown += "\n".join(recommendations)
         # Create score chart data
         chart_data = pd.DataFrame({
             'Dimension': [
@@ -1024,27 +810,19 @@ def score_resume(job_description, resume_file, resume_text):
             'Weight (%)': [25, 20, 10, 7, 10, 10, 10, 8]
         })
-        return breakdown, chart_data
     except Exception as e:
-        return f"Error processing resume: {str(e)}", ""
-# Create Gradio interface
-with gr.Blocks(title="ATS Resume Scorer", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
-    # 🎯 ATS Resume Scorer
-    This tool evaluates how well a resume matches a job description using 8 key dimensions:
-    - **Relevant Skills** (25%) - Match of skills to job requirements
-    - **Work Experience** (20%) - Years and relevance of experience
-    - **Education** (10%) - Degree relevance and performance
-    - **Certifications & Courses** (7%) - Additional qualifications
-    - **Projects** (10%) - Quality and relevance of projects
-    - **Keywords Match** (10%) - Job-specific keyword alignment
-    - **Tools & Technologies** (10%) - Technical proficiency
-    - **Soft Skills** (8%) - Leadership, teamwork, communication
-    **Supported Domains:** Web Development, Mobile Development, Data Science, Cybersecurity, DevOps, Game Development, UI/UX Design, Business Analysis, Marketing, Consultancy, AI/ML Engineering
     **📄 Resume Input:** Upload PDF/DOCX file OR paste text manually
     **📋 Job Description:** Paste as text
@@ -1078,109 +856,26 @@ with gr.Blocks(title="ATS Resume Scorer", theme=gr.themes.Soft()) as demo:
                     max_lines=15
                 )
-    score_btn = gr.Button("📊 Score Resume", variant="primary", size="lg")
     with gr.Row():
         with gr.Column():
-            score_output = gr.Markdown(label="Scoring Results")
         with gr.Column():
-            chart_output = gr.Dataframe(
-                label="Dimension Scores",
-                headers=['Dimension', 'Score', 'Weight (%)'],
-                datatype=['str', 'number', 'number']
-            )
-    # Example inputs
-    gr.Examples(
-        examples=[
-            [
-                """Frontend Developer - React.js
-                We are seeking a Frontend Developer with 2+ years of experience in React.js development.
-                Requirements:
-                - Bachelor's degree in Computer Science or related field
-                - Strong proficiency in JavaScript, HTML, CSS
-                - Experience with React.js, Redux, and modern frontend frameworks
-                - Knowledge of responsive design and cross-browser compatibility
-                - Experience with version control (Git)
-                - Understanding of RESTful APIs
-                - Strong problem-solving skills and attention to detail""",
-                None,  # No file upload in example
-                """John Smith
-                Frontend Developer
-                Education:
-                - Bachelor of Technology in Computer Science, ABC University (2020)
-                Experience:
-                - Frontend Developer at Tech Solutions (2021-2024, 3 years)
-                - Developed responsive web applications using React.js and Redux
-                - Collaborated with backend developers to integrate RESTful APIs
-                - Implemented modern CSS frameworks and ensured cross-browser compatibility
-                Skills:
-                - Frontend: JavaScript, HTML5, CSS3, React.js, Redux, Vue.js
-                - Tools: Git, Webpack, npm, VS Code
-                - Responsive Design, Cross-browser compatibility
-                - RESTful API integration
-                Projects:
-                - E-commerce Website: Built using React.js with Redux for state management
-                - Portfolio Dashboard: Responsive web application with modern UI/UX"""
-            ],
-            [
-                """UI/UX Designer - Product Design
-                We are seeking a UI/UX Designer with 2+ years of experience in product design and user research.
-                Requirements:
-                - Bachelor's degree in Design, HCI, or related field
-                - Strong proficiency in Figma, Sketch, and Adobe Creative Suite
-                - Experience with user research and usability testing
-                - Knowledge of design systems and prototyping
-                - Understanding of frontend technologies (HTML, CSS, JavaScript)
-                - Strong visual design and interaction design skills
-                - Experience with A/B testing and data-driven design
-                - Excellent communication and collaboration skills""",
-                None,  # No file upload in example
-                """Sarah Johnson
-                UI/UX Designer
-                Education:
-                - Bachelor of Fine Arts in Graphic Design, Art Institute (2020)
-                Experience:
-                - UI/UX Designer at Design Studio (2021-2024, 3 years)
-                - Created user interfaces and experiences for web and mobile applications
-                - Conducted user research and usability testing sessions
-                - Developed design systems and component libraries using Figma
-                - Collaborated with frontend developers on implementation
-                Skills:
-                - Design Tools: Figma, Sketch, Adobe XD, Photoshop, Illustrator
-                - Prototyping: InVision, Principle, Framer
-                - Research: User interviews, A/B testing, Analytics
-                - Frontend: HTML, CSS, JavaScript basics
-                - Design: Visual design, Interaction design, Wireframing
-                Projects:
-                - E-commerce Mobile App: Designed complete user experience with user research and prototyping
-                - SaaS Dashboard Redesign: Led design system creation and improved user engagement by 40%
-                Certifications:
-                - Google UX Design Certificate
-                - Figma Advanced Certification"""
-            ]
-        ],
-        inputs=[job_desc_input, resume_file_input, resume_text_input]
-    )
     score_btn.click(
         fn=score_resume,
         inputs=[job_desc_input, resume_file_input, resume_text_input],
-        outputs=[score_output, chart_output]
     )
 if __name__ == "__main__":

 import docx
 import io
 from pathlib import Path
+import os
+import google.generativeai as genai
+from typing import Dict, Any
+# Configure Gemini API
+genai.configure(api_key=os.environ.get("GEMINI_API_KEY"))
 class ATSScorer:
     def __init__(self):
             ]
         }
+    def analyze_cv(self, cv_text: str, job_description: str) -> Dict[str, Any]:
+        """
+        Analyze CV against job description using Gemini AI
+        """
+        try:
+            prompt = f"""You are a smart and unbiased AI CV screening assistant. Your task is to evaluate how well a candidate's resume (CV) matches a job description. The job description may include one or more roles and may contain responsibilities, expectations, and skill requirements.
+Carefully review both the CV and the Job Description, and provide the output as a **valid JSON object** with the following keys:
+1. **reasoning** (string): Provide a concise but insightful explanation of how well the candidate matches the job requirements — mention key matching points like role alignment, experience, and relevant technologies.
+2. **skills_available** (array of 6 or fewer strings): List up to 6 skills or competencies from the CV that strongly align with the job description.
+3. **missing** (array of 6 or fewer strings): List up to 6 important skills, experiences, or qualifications the candidate lacks based on the job description. If nothing is missing, return a single string in the array: "You are good to go".
+CV:
+\"\"\"
+{cv_text}
+\"\"\"
+Job Description:
+\"\"\"
+{job_description}
+\"\"\"
+"""
+            model = genai.GenerativeModel('gemini-2.0-flash-exp')
+            response = model.generate_content(prompt)
+            # Extract JSON from response
+            text = response.text
+            json_start = text.find("{")
+            json_end = text.rfind("}") + 1
+            if json_start != -1 and json_end != -1:
+                json_string = text[json_start:json_end]
+                parsed_result = json.loads(json_string)
+                return {"success": True, "result": parsed_result}
+            else:
+                return {"success": False, "message": "Could not parse JSON response"}
+        except Exception as e:
+            print(f'Error analyzing CV: {e}')
+            return {"success": False, "message": f"Error: {str(e)}"}
+    def format_analysis_output(self, analysis_result: Dict[str, Any]) -> str:
+        """
+        Format the analysis result for display in Gradio
+        """
+        if not analysis_result.get("success"):
+            return f"❌ **Error:** {analysis_result.get('message', 'Unknown error')}"
+        result = analysis_result["result"]
+        output = "## 📊 **AI-Powered CV Analysis**\n\n"
+        # Reasoning section
+        output += "### 🔍 **Analysis & Reasoning**\n"
+        output += f"{result.get('reasoning', 'No reasoning provided')}\n\n"
+        # Skills available
+        output += "### ✅ **Matching Skills Found**\n"
+        skills = result.get('skills_available', [])
+        if skills:
+            for skill in skills:
+                output += f"• {skill}\n"
+        else:
+            output += "• No matching skills identified\n"
+        output += "\n"
+        # Missing skills
+        output += "### ⚠️ **Areas for Improvement**\n"
+        missing = result.get('missing', [])
+        if missing:
+            if len(missing) == 1 and missing[0] == "You are good to go":
+                output += "🎉 **Excellent! You are good to go!**\n"
+            else:
+                for item in missing:
+                    output += f"• {item}\n"
+        else:
+            output += "• No gaps identified\n"
+        return output
     def extract_text_from_pdf(self, pdf_file):
         """Extract text from PDF file"""
         try:
         if file is None:
             return ""
+        file_path = Path(file)
         file_extension = file_path.suffix.lower()
         try:
             if file_extension == '.pdf':
+                return self.extract_text_from_pdf(file)
             elif file_extension in ['.docx', '.doc']:
+                return self.extract_text_from_docx(file)
             else:
                 raise Exception(f"Unsupported file format: {file_extension}. Please upload PDF or DOCX files.")
         except Exception as e:
     def preprocess_text(self, text):
         """Clean and preprocess text"""
+        # Convert to lowercase
+        text = text.lower()
         # Remove extra whitespace
         text = re.sub(r'\s+', ' ', text)
+        # Remove special characters but keep important ones
+        text = re.sub(r'[^\w\s\-\+\#\.]', ' ', text)
+        return text.strip()
+    def extract_skills_from_text(self, text, domain=None):
+        """Extract skills from text based on domain"""
+        text = self.preprocess_text(text)
+        found_skills = []
+        # If domain is specified, prioritize skills from that domain
+        if domain and domain in self.skill_categories:
+            domain_skills = self.skill_categories[domain]
+            for skill in domain_skills:
+                if skill.lower() in text:
+                    found_skills.append(skill)
+        # Also check all skill categories
+        for category, skills in self.skill_categories.items():
+            for skill in skills:
+                if skill.lower() in text and skill not in found_skills:
+                    found_skills.append(skill)
+        return list(set(found_skills))
+    def detect_domain(self, text):
+        """Detect the primary domain/field from text"""
+        text = self.preprocess_text(text)
         domain_scores = {}
+        for domain, priorities in self.domain_indicators.items():
             score = 0
+            # High priority keywords
+            for keyword in priorities['high_priority']:
+                if keyword in text:
                     score += 3
+            # Medium priority keywords
+            for keyword in priorities['medium_priority']:
+                if keyword in text:
+                    score += 2
+            # Low priority keywords
+            for keyword in priorities['low_priority']:
+                if keyword in text:
                     score += 1
             domain_scores[domain] = score
+        # Return the domain with highest score
+        if domain_scores:
             return max(domain_scores, key=domain_scores.get)
+        return None
+    def calculate_relevant_skills_score(self, job_description, resume):
+        """Calculate relevant skills score"""
+        # Detect domain from job description
+        job_domain = self.detect_domain(job_description)
+        # Extract skills from both texts
+        job_skills = self.extract_skills_from_text(job_description, job_domain)
+        resume_skills = self.extract_skills_from_text(resume, job_domain)
+        if not job_skills:
+            return 50  # Default score if no skills detected in job description
+        # Calculate overlap
+        matching_skills = set(job_skills) & set(resume_skills)
+        skill_match_ratio = len(matching_skills) / len(job_skills)
+        # Bonus for domain-specific skills
+        domain_bonus = 0
+        if job_domain and job_domain in self.skill_categories:
+            domain_skills = self.skill_categories[job_domain]
+            domain_matches = [skill for skill in matching_skills if skill in domain_skills]
+            domain_bonus = min(15, len(domain_matches) * 3)
+        # Calculate base score
+        base_score = min(85, skill_match_ratio * 100)
+        final_score = min(100, base_score + domain_bonus)
+        return final_score
+    def extract_experience_years(self, text):
         """Extract years of experience from text"""
+        text = self.preprocess_text(text)
+        # Patterns for experience extraction
         patterns = [
+            r'(\d+)\+?\s*years?\s*(?:of\s*)?experience',
+            r'(\d+)\+?\s*years?\s*(?:of\s*)?(?:work\s*)?experience',
+            r'experience\s*(?:of\s*)?(\d+)\+?\s*years?',
+            r'(\d+)\+?\s*years?\s*(?:in|of|with)',
+            r'over\s*(\d+)\s*years?',
+            r'more\s*than\s*(\d+)\s*years?'
         ]
         years = []
         for pattern in patterns:
             matches = re.findall(pattern, text)
             years.extend([int(match) for match in matches])
+        # Also look for date ranges in experience section
+        date_patterns = [
+            r'(\d{4})\s*-\s*(\d{4})',
+            r'(\d{4})\s*to\s*(\d{4})',
+            r'(\d{4})\s*–\s*(\d{4})'
+        ]
+        current_year = 2024
+        for pattern in date_patterns:
+            matches = re.findall(pattern, text)
+            for start, end in matches:
+                start_year = int(start)
+                end_year = int(end) if end != 'present' else current_year
+                if end_year > start_year:
+                    years.append(end_year - start_year)
         return max(years) if years else 0
+    def calculate_work_experience_score(self, job_description, resume):
+        """Calculate work experience score"""
+        # Extract required experience from job description
+        job_experience = self.extract_experience_years(job_description)
+        resume_experience = self.extract_experience_years(resume)
+        # Look for experience-related keywords in resume
+        experience_keywords = ['experience', 'worked', 'employed', 'position', 'role', 'job', 'internship', 'intern']
         resume_lower = resume.lower()
+        experience_mentions = sum(1 for keyword in experience_keywords if keyword in resume_lower)
+        # Calculate score based on experience match
+        if job_experience == 0:
+            # If no specific experience required, base on mentions
+            return min(80, 40 + experience_mentions * 8)
+        if resume_experience >= job_experience:
+            return min(100, 80 + (resume_experience - job_experience) * 2)
+        elif resume_experience >= job_experience * 0.7:
+            return 70
+        elif resume_experience >= job_experience * 0.5:
+            return 60
         else:
+            return max(30, 30 + experience_mentions * 5)
+    def calculate_education_score(self, job_description, resume):
+        """Calculate education score"""
         resume_lower = resume.lower()
+        job_lower = job_description.lower()
+        # Check for degree types
+        degree_score = 0
+        for degree in self.education_patterns['degree_types']:
+            if degree in resume_lower:
+                degree_score += 20
+                break
+        # Check for education keywords
+        education_mentions = sum(1 for keyword in self.education_keywords if keyword in resume_lower)
+        education_score = min(30, education_mentions * 10)
+        # Check for undergraduate patterns
+        undergraduate_score = 0
+        for pattern in self.education_patterns['undergraduate']:
+            if pattern in resume_lower:
+                undergraduate_score = 15
+                break
+        # Year indicators
+        year_score = 0
+        for year in self.education_patterns['year_indicators']:
+            if year in resume_lower:
+                year_score = 10
+                break
+        # Bonus for relevant field
+        field_bonus = 0
+        domain = self.detect_domain(job_description)
+        if domain:
+            domain_keywords = [domain.replace('_', ' '), domain.replace('_', '')]
+            for keyword in domain_keywords:
+                if keyword in resume_lower:
+                    field_bonus = 20
+                    break
+        total_score = degree_score + education_score + undergraduate_score + year_score + field_bonus
+        return min(100, max(40, total_score))
+    def calculate_certifications_score(self, job_description, resume):
+        """Calculate certifications score"""
+        resume_lower = resume.lower()
         # Check for certification keywords
+        cert_mentions = sum(1 for keyword in self.certification_keywords if keyword in resume_lower)
+        # Look for specific certification patterns
+        cert_patterns = [
+            r'certified\s+\w+',
+            r'\w+\s+certification',
+            r'\w+\s+certificate',
+            r'licensed\s+\w+',
+            r'accredited\s+\w+'
+        ]
+        pattern_matches = 0
+        for pattern in cert_patterns:
+            if re.search(pattern, resume_lower):
+                pattern_matches += 1
+        # Domain-specific certifications
+        domain = self.detect_domain(job_description)
         domain_cert_bonus = 0
+        if domain == 'cybersecurity':
+            cyber_certs = ['cissp', 'ceh', 'oscp', 'comptia', 'security+']
+            for cert in cyber_certs:
+                if cert in resume_lower:
+                    domain_cert_bonus += 15
+        elif domain == 'cloud':
+            cloud_certs = ['aws', 'azure', 'gcp', 'cloud practitioner']
+            for cert in cloud_certs:
+                if cert in resume_lower:
+                    domain_cert_bonus += 15
+        base_score = min(60, cert_mentions * 15 + pattern_matches * 10)
+        total_score = min(100, base_score + domain_cert_bonus)
+        return max(40, total_score) if cert_mentions > 0 or pattern_matches > 0 else 40
+    def categorize_projects(self, project_text):
+        """Categorize projects based on content"""
+        project_text = self.preprocess_text(project_text)
+        categories = []
         for category, keywords in self.project_categories.items():
+            for keyword in keywords:
+                if keyword in project_text:
+                    categories.append(category)
+                    break
+        return categories
+    def calculate_projects_score(self, job_description, resume):
+        """Calculate projects score"""
         resume_lower = resume.lower()
+        # Extract project mentions
+        project_mentions = sum(1 for keyword in self.project_keywords if keyword in resume_lower)
+        # Look for project sections
+        project_section_indicators = ['projects', 'personal projects', 'academic projects', 'work projects']
+        has_project_section = any(indicator in resume_lower for indicator in project_section_indicators)
+        # Categorize projects
+        project_categories = self.categorize_projects(resume)
+        job_domain = self.detect_domain(job_description)
+        # Calculate relevance
+        relevance_bonus = 0
+        if job_domain and job_domain in project_categories:
+            relevance_bonus = 25
+        # Calculate base score
+        base_score = min(50, project_mentions * 8)
+        section_bonus = 20 if has_project_section else 0
+        category_bonus = min(15, len(project_categories) * 3)
+        total_score = base_score + section_bonus + category_bonus + relevance_bonus
+        return min(100, max(30, total_score))
+    def calculate_keywords_match_score(self, job_description, resume):
+        """Calculate keyword matching score using semantic similarity"""
+        try:
+            # Preprocess texts
+            job_text = self.preprocess_text(job_description)
+            resume_text = self.preprocess_text(resume)
+            # Get embeddings
+            job_embedding = self.sentence_model.encode([job_text])
+            resume_embedding = self.sentence_model.encode([resume_text])
+            # Calculate cosine similarity
+            similarity = cosine_similarity(job_embedding, resume_embedding)[0][0]
+            # Convert to percentage
+            similarity_score = similarity * 100
+            # Add keyword overlap bonus
+            job_words = set(job_text.split())
+            resume_words = set(resume_text.split())
+            # Filter out common words
+            common_words = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'must', 'shall', 'a', 'an', 'this', 'that', 'these', 'those'}
+            job_words = job_words - common_words
+            resume_words = resume_words - common_words
+            if job_words:
+                overlap = len(job_words & resume_words) / len(job_words)
+                overlap_bonus = overlap * 20
             else:
+                overlap_bonus = 0
+            final_score = min(100, similarity_score + overlap_bonus)
+            return max(30, final_score)
+        except Exception as e:
+            print(f"Error in keyword matching: {e}")
+            # Fallback to simple word matching
+            job_words = set(job_description.lower().split())
+            resume_words = set(resume.lower().split())
+            if job_words:
+                overlap = len(job_words & resume_words) / len(job_words)
+                return min(100, max(30, overlap * 100))
+            return 50
+    def calculate_tools_tech_score(self, job_description, resume):
+        """Calculate tools and technology score"""
+        # Extract tools and technologies from both texts
+        job_tools = self.extract_skills_from_text(job_description)
+        resume_tools = self.extract_skills_from_text(resume)
+        # Focus on technical skills
+        technical_categories = ['programming', 'databases', 'cloud', 'web_development', 'mobile_development', 'data_science', 'cybersecurity', 'ai_ml_engineering']
+        job_tech_skills = []
+        resume_tech_skills = []
+        for category in technical_categories:
             if category in self.skill_categories:
+                category_skills = self.skill_categories[category]
+                job_tech_skills.extend([skill for skill in job_tools if skill in category_skills])
+                resume_tech_skills.extend([skill for skill in resume_tools if skill in category_skills])
+        if not job_tech_skills:
+            return 60  # Default score if no technical skills in job description
+        # Calculate overlap
+        matching_tools = set(job_tech_skills) & set(resume_tech_skills)
+        tool_match_ratio = len(matching_tools) / len(job_tech_skills)
+        # Bonus for having more tools than required
+        extra_tools_bonus = min(15, max(0, len(resume_tech_skills) - len(job_tech_skills)) * 2)
+        base_score = tool_match_ratio * 85
+        final_score = min(100, base_score + extra_tools_bonus)
+        return max(40, final_score)
+    def infer_soft_skills(self, text):
+        """Infer soft skills from interests and activities"""
+        text = self.preprocess_text(text)
+        inferred_skills = []
+        for skill, indicators in self.interest_skill_mapping.items():
+            for indicator in indicators:
+                if indicator in text:
+                    inferred_skills.append(skill)
+                    break
+        return inferred_skills
+    def calculate_soft_skills_score(self, job_description, resume):
+        """Calculate soft skills score"""
+        # Direct soft skills from skill categories
+        job_soft_skills = [skill for skill in self.skill_categories['soft_skills'] if skill in job_description.lower()]
+        resume_soft_skills = [skill for skill in self.skill_categories['soft_skills'] if skill in resume.lower()]
+        # Inferred soft skills from activities and interests
+        inferred_skills = self.infer_soft_skills(resume)
+        # Combine direct and inferred skills
+        all_resume_soft_skills = list(set(resume_soft_skills + inferred_skills))
+        if not job_soft_skills:
+            # If no specific soft skills mentioned in job, give credit for having any
+            return min(80, 50 + len(all_resume_soft_skills) * 5)
+        # Calculate overlap
+        matching_soft_skills = set(job_soft_skills) & set(all_resume_soft_skills)
+        if job_soft_skills:
+            soft_skill_ratio = len(matching_soft_skills) / len(job_soft_skills)
         else:
+            soft_skill_ratio = 0.6  # Default ratio
+        # Bonus for having diverse soft skills
+        diversity_bonus = min(20, len(all_resume_soft_skills) * 3)
+        base_score = soft_skill_ratio * 70
+        final_score = min(100, base_score + diversity_bonus)
+        return max(50, final_score)
     def calculate_final_score(self, job_description, resume):
         """Calculate the weighted final score"""
         scores = {}
         # Calculate individual dimension scores
+        scores['relevant_skills'] = self.calculate_relevant_skills_score(job_description, resume)
+        scores['work_experience'] = self.calculate_work_experience_score(job_description, resume)
+        scores['education'] = self.calculate_education_score(job_description, resume)
+        scores['certifications'] = self.calculate_certifications_score(job_description, resume)
+        scores['projects'] = self.calculate_projects_score(job_description, resume)
+        scores['keywords_match'] = self.calculate_keywords_match_score(job_description, resume)
+        scores['tools_tech'] = self.calculate_tools_tech_score(job_description, resume)
+        scores['soft_skills'] = self.calculate_soft_skills_score(job_description, resume)
         # Calculate weighted final score
         final_score = sum(scores[dim] * self.weights[dim] for dim in scores)
         return final_score, scores
 # Initialize the scorer
 scorer = ATSScorer()
 def score_resume(job_description, resume_file, resume_text):
+    """Enhanced function to score resume and provide AI analysis"""
     if not job_description.strip():
+        return "Please provide a job description.", "", ""
     # Determine resume source
     resume_content = ""
         try:
             resume_content = scorer.extract_text_from_file(resume_file)
             if not resume_content.strip():
+                return "Could not extract text from the uploaded file. Please check the file format.", "", ""
         except Exception as e:
+            return f"Error processing file: {str(e)}", "", ""
     elif resume_text.strip():
         resume_content = resume_text.strip()
     else:
+        return "Please provide either a resume file (PDF/DOCX) or paste resume text.", "", ""
     try:
+        # Get ATS score
         final_score, dimension_scores = scorer.calculate_final_score(job_description, resume_content)
+        # Get AI analysis
+        analysis_result = scorer.analyze_cv(resume_content, job_description)
+        ai_analysis = scorer.format_analysis_output(analysis_result)
+        # Create ATS breakdown
+        ats_breakdown = f"""
 ## Overall ATS Score: {final_score:.1f}/100
 ### Dimension Breakdown:
 - **Relevant Skills** (25%): {dimension_scores['relevant_skills']:.1f}/100
 - **Work Experience** (20%): {dimension_scores['work_experience']:.1f}/100
 - **56-75**: Good match
 - **45-55**: Fair match
 - **Below 40**: Poor match
 """
         # Create score chart data
         chart_data = pd.DataFrame({
             'Dimension': [
             'Weight (%)': [25, 20, 10, 7, 10, 10, 10, 8]
         })
+        return ats_breakdown, ai_analysis, chart_data
     except Exception as e:
+        return f"Error processing resume: {str(e)}", "", ""
+# Create Enhanced Gradio interface
+with gr.Blocks(title="Enhanced ATS Resume Scorer", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+    # 🎯 Enhanced ATS Resume Scorer with AI Analysis
+    This tool provides **dual analysis** of your resume:
+    1. **ATS Score** - Technical matching across 8 dimensions
+    2. **AI Analysis** - Intelligent insights and recommendations
     **📄 Resume Input:** Upload PDF/DOCX file OR paste text manually
     **📋 Job Description:** Paste as text
                     max_lines=15
                 )
+    score_btn = gr.Button("🚀 Analyze Resume", variant="primary", size="lg")
     with gr.Row():
         with gr.Column():
+            ats_output = gr.Markdown(label="ATS Scoring Results")
         with gr.Column():
+            ai_output = gr.Markdown(label="AI Analysis Results")
+    with gr.Row():
+        chart_output = gr.Dataframe(
+            label="Dimension Scores",
+            headers=['Dimension', 'Score', 'Weight (%)'],
+            datatype=['str', 'number', 'number']
+        )
     score_btn.click(
         fn=score_resume,
         inputs=[job_desc_input, resume_file_input, resume_text_input],
+        outputs=[ats_output, ai_output, chart_output]
     )
 if __name__ == "__main__":