Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import pandas as pd | |
| import spacy | |
| import re | |
| from pathlib import Path | |
| import PyPDF2 | |
| import docx | |
| import json | |
| # Load models | |
| try: | |
| nlp = spacy.load("en_core_web_sm") | |
| except OSError: | |
| from spacy.cli import download | |
| download("en_core_web_sm") | |
| nlp = spacy.load("en_core_web_sm") | |
| keyword_extractor = pipeline("token-classification", model="jean-baptiste/roberta-large-ner-english") | |
| classifier = pipeline("text-classification", model="microsoft/MiniLM-L12-H384-uncased") | |
| def extract_text_from_resume(file): | |
| file_path = file.name | |
| text = "" | |
| if file_path.endswith('.pdf'): | |
| with open(file_path, 'rb') as pdf_file: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| elif file_path.endswith('.docx'): | |
| doc = docx.Document(file_path) | |
| for paragraph in doc.paragraphs: | |
| text += paragraph.text + '\n' | |
| elif file_path.endswith('.txt'): | |
| with open(file_path, 'r', encoding='utf-8') as txt_file: | |
| text = txt_file.read() | |
| return text.strip() | |
| def extract_information(text): | |
| doc = nlp(text) | |
| entities = { | |
| "skills": [], | |
| "education": [], | |
| "experience": [], | |
| "contact": [] | |
| } | |
| # Extract skills (using a predefined list of common skills) | |
| common_skills = ["python", "java", "javascript", "sql", "machine learning", "data analysis"] | |
| text_lower = text.lower() | |
| entities["skills"] = [skill for skill in common_skills if skill in text_lower] | |
| # Extract education | |
| education_keywords = ["university", "college", "bachelor", "master", "phd", "degree"] | |
| for sent in doc.sents: | |
| if any(keyword in sent.text.lower() for keyword in education_keywords): | |
| entities["education"].append(sent.text.strip()) | |
| # Extract experience | |
| experience_keywords = ["experience", "work", "job", "position", "role"] | |
| for sent in doc.sents: | |
| if any(keyword in sent.text.lower() for keyword in experience_keywords): | |
| entities["experience"].append(sent.text.strip()) | |
| # Extract contact information | |
| email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b' | |
| phone_pattern = r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b' | |
| emails = re.findall(email_pattern, text) | |
| phones = re.findall(phone_pattern, text) | |
| entities["contact"] = emails + phones | |
| return entities | |
| def analyze_resume(text, entities): | |
| scores = { | |
| "completeness": 0, | |
| "skills_match": 0, | |
| "formatting": 0, | |
| "keyword_optimization": 0 | |
| } | |
| # Completeness score | |
| score_components = 0 | |
| if entities["skills"]: score_components += 1 | |
| if entities["education"]: score_components += 1 | |
| if entities["experience"]: score_components += 1 | |
| if entities["contact"]: score_components += 1 | |
| scores["completeness"] = (score_components / 4) * 100 | |
| # Skills match score | |
| desired_skills = ["python", "java", "javascript", "sql", "machine learning"] | |
| matched_skills = sum(1 for skill in entities["skills"] if skill in desired_skills) | |
| scores["skills_match"] = (matched_skills / len(desired_skills)) * 100 | |
| # Formatting score | |
| formatting_score = 0 | |
| if len(text.split('\n')) > 5: formatting_score += 20 | |
| if len(text) > 200: formatting_score += 20 | |
| if any(char.isupper() for char in text): formatting_score += 20 | |
| if re.search(r'\b\d{4}\b', text): formatting_score += 20 | |
| if len(re.findall(r'[.!?]', text)) > 3: formatting_score += 20 | |
| scores["formatting"] = formatting_score | |
| # Keyword optimization score | |
| keywords = keyword_extractor(text[:512]) | |
| scores["keyword_optimization"] = min(len(keywords) * 10, 100) | |
| return scores | |
| def generate_recommendations(scores, entities): | |
| recommendations = [] | |
| if scores["completeness"] < 75: | |
| recommendations.append("π Add more sections to your resume to improve completeness.") | |
| if not entities["skills"]: | |
| recommendations.append("- Add a skills section") | |
| if not entities["education"]: | |
| recommendations.append("- Add education details") | |
| if not entities["experience"]: | |
| recommendations.append("- Add work experience") | |
| if not entities["contact"]: | |
| recommendations.append("- Add contact information") | |
| if scores["skills_match"] < 60: | |
| recommendations.append("\nπ‘ Consider adding more relevant skills:") | |
| recommendations.append("- Focus on technical skills like Python, Java, SQL") | |
| recommendations.append("- Include both hard and soft skills") | |
| if scores["formatting"] < 80: | |
| recommendations.append("\nπ Improve resume formatting:") | |
| recommendations.append("- Use clear section headings") | |
| recommendations.append("- Include dates for experiences") | |
| recommendations.append("- Use bullet points for better readability") | |
| if scores["keyword_optimization"] < 70: | |
| recommendations.append("\nπ Optimize keywords usage:") | |
| recommendations.append("- Use more industry-specific terms") | |
| recommendations.append("- Include action verbs") | |
| recommendations.append("- Mention specific technologies and tools") | |
| return "\n".join(recommendations) | |
| def process_resume(file): | |
| text = extract_text_from_resume(file) | |
| entities = extract_information(text) | |
| scores = analyze_resume(text, entities) | |
| recommendations = generate_recommendations(scores, entities) | |
| return scores, recommendations | |
| def create_interface(): | |
| with gr.Blocks() as app: | |
| gr.Markdown(""" | |
| # Resume Analyzer and Optimizer | |
| Upload your resume to get personalized analysis and recommendations. | |
| """) | |
| with gr.Row(): | |
| file_input = gr.File( | |
| label="Upload Resume (PDF, DOCX, or TXT)", | |
| file_types=["pdf", "docx", "txt"] | |
| ) | |
| with gr.Row(): | |
| analyze_button = gr.Button("Analyze Resume", variant="primary") | |
| with gr.Row(): | |
| with gr.Column(): | |
| score_output = gr.JSON(label="Analysis Scores") | |
| with gr.Column(): | |
| recommendations_output = gr.Textbox( | |
| label="Recommendations", | |
| lines=10 | |
| ) | |
| analyze_button.click( | |
| fn=process_resume, | |
| inputs=[file_input], | |
| outputs=[score_output, recommendations_output] | |
| ) | |
| return app | |
| if __name__ == "__main__": | |
| app = create_interface() | |
| app.launch() |