Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
import pandas as pd | |
import spacy | |
import re | |
from pathlib import Path | |
import PyPDF2 | |
import docx | |
import json | |
# Load models | |
try: | |
nlp = spacy.load("en_core_web_sm") | |
except OSError: | |
from spacy.cli import download | |
download("en_core_web_sm") | |
nlp = spacy.load("en_core_web_sm") | |
keyword_extractor = pipeline("token-classification", model="jean-baptiste/roberta-large-ner-english") | |
classifier = pipeline("text-classification", model="microsoft/MiniLM-L12-H384-uncased") | |
def extract_text_from_resume(file): | |
file_path = file.name | |
text = "" | |
if file_path.endswith('.pdf'): | |
with open(file_path, 'rb') as pdf_file: | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
elif file_path.endswith('.docx'): | |
doc = docx.Document(file_path) | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + '\n' | |
elif file_path.endswith('.txt'): | |
with open(file_path, 'r', encoding='utf-8') as txt_file: | |
text = txt_file.read() | |
return text.strip() | |
def extract_information(text): | |
doc = nlp(text) | |
entities = { | |
"skills": [], | |
"education": [], | |
"experience": [], | |
"contact": [] | |
} | |
# Extract skills (using a predefined list of common skills) | |
common_skills = ["python", "java", "javascript", "sql", "machine learning", "data analysis"] | |
text_lower = text.lower() | |
entities["skills"] = [skill for skill in common_skills if skill in text_lower] | |
# Extract education | |
education_keywords = ["university", "college", "bachelor", "master", "phd", "degree"] | |
for sent in doc.sents: | |
if any(keyword in sent.text.lower() for keyword in education_keywords): | |
entities["education"].append(sent.text.strip()) | |
# Extract experience | |
experience_keywords = ["experience", "work", "job", "position", "role"] | |
for sent in doc.sents: | |
if any(keyword in sent.text.lower() for keyword in experience_keywords): | |
entities["experience"].append(sent.text.strip()) | |
# Extract contact information | |
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b' | |
phone_pattern = r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b' | |
emails = re.findall(email_pattern, text) | |
phones = re.findall(phone_pattern, text) | |
entities["contact"] = emails + phones | |
return entities | |
def analyze_resume(text, entities): | |
scores = { | |
"completeness": 0, | |
"skills_match": 0, | |
"formatting": 0, | |
"keyword_optimization": 0 | |
} | |
# Completeness score | |
score_components = 0 | |
if entities["skills"]: score_components += 1 | |
if entities["education"]: score_components += 1 | |
if entities["experience"]: score_components += 1 | |
if entities["contact"]: score_components += 1 | |
scores["completeness"] = (score_components / 4) * 100 | |
# Skills match score | |
desired_skills = ["python", "java", "javascript", "sql", "machine learning"] | |
matched_skills = sum(1 for skill in entities["skills"] if skill in desired_skills) | |
scores["skills_match"] = (matched_skills / len(desired_skills)) * 100 | |
# Formatting score | |
formatting_score = 0 | |
if len(text.split('\n')) > 5: formatting_score += 20 | |
if len(text) > 200: formatting_score += 20 | |
if any(char.isupper() for char in text): formatting_score += 20 | |
if re.search(r'\b\d{4}\b', text): formatting_score += 20 | |
if len(re.findall(r'[.!?]', text)) > 3: formatting_score += 20 | |
scores["formatting"] = formatting_score | |
# Keyword optimization score | |
keywords = keyword_extractor(text[:512]) | |
scores["keyword_optimization"] = min(len(keywords) * 10, 100) | |
return scores | |
def generate_recommendations(scores, entities): | |
recommendations = [] | |
if scores["completeness"] < 75: | |
recommendations.append("π Add more sections to your resume to improve completeness.") | |
if not entities["skills"]: | |
recommendations.append("- Add a skills section") | |
if not entities["education"]: | |
recommendations.append("- Add education details") | |
if not entities["experience"]: | |
recommendations.append("- Add work experience") | |
if not entities["contact"]: | |
recommendations.append("- Add contact information") | |
if scores["skills_match"] < 60: | |
recommendations.append("\nπ‘ Consider adding more relevant skills:") | |
recommendations.append("- Focus on technical skills like Python, Java, SQL") | |
recommendations.append("- Include both hard and soft skills") | |
if scores["formatting"] < 80: | |
recommendations.append("\nπ Improve resume formatting:") | |
recommendations.append("- Use clear section headings") | |
recommendations.append("- Include dates for experiences") | |
recommendations.append("- Use bullet points for better readability") | |
if scores["keyword_optimization"] < 70: | |
recommendations.append("\nπ Optimize keywords usage:") | |
recommendations.append("- Use more industry-specific terms") | |
recommendations.append("- Include action verbs") | |
recommendations.append("- Mention specific technologies and tools") | |
return "\n".join(recommendations) | |
def process_resume(file): | |
text = extract_text_from_resume(file) | |
entities = extract_information(text) | |
scores = analyze_resume(text, entities) | |
recommendations = generate_recommendations(scores, entities) | |
return scores, recommendations | |
def create_interface(): | |
with gr.Blocks() as app: | |
gr.Markdown(""" | |
# Resume Analyzer and Optimizer | |
Upload your resume to get personalized analysis and recommendations. | |
""") | |
with gr.Row(): | |
file_input = gr.File( | |
label="Upload Resume (PDF, DOCX, or TXT)", | |
file_types=["pdf", "docx", "txt"] | |
) | |
with gr.Row(): | |
analyze_button = gr.Button("Analyze Resume", variant="primary") | |
with gr.Row(): | |
with gr.Column(): | |
score_output = gr.JSON(label="Analysis Scores") | |
with gr.Column(): | |
recommendations_output = gr.Textbox( | |
label="Recommendations", | |
lines=10 | |
) | |
analyze_button.click( | |
fn=process_resume, | |
inputs=[file_input], | |
outputs=[score_output, recommendations_output] | |
) | |
return app | |
if __name__ == "__main__": | |
app = create_interface() | |
app.launch() |