File size: 7,891 Bytes
9ea4ad7
 
36680cc
9ea4ad7
 
 
36680cc
8948c5a
 
9ea4ad7
 
8948c5a
 
 
36680cc
9ea4ad7
 
 
ca5bcb1
 
 
36680cc
 
9ea4ad7
36680cc
9ea4ad7
 
8948c5a
 
 
 
 
 
 
 
 
9ea4ad7
 
36680cc
9ea4ad7
 
36680cc
9ea4ad7
 
 
 
 
 
 
 
37504a2
b41f52e
 
9ea4ad7
37504a2
 
 
9ea4ad7
37504a2
 
9ea4ad7
 
37504a2
9ea4ad7
36680cc
9ea4ad7
 
 
 
 
 
 
36680cc
 
 
 
 
 
 
 
 
9ea4ad7
 
 
 
 
 
 
 
 
36680cc
 
9ea4ad7
 
 
 
 
 
 
 
 
 
36680cc
 
9ea4ad7
36680cc
9ea4ad7
 
36680cc
463776a
36680cc
9ea4ad7
36680cc
 
 
0d3d70e
9ea4ad7
1e9bc48
37504a2
9ea4ad7
 
 
36680cc
 
 
 
 
 
 
 
9ea4ad7
36680cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ea4ad7
469df2a
9ea4ad7
 
36680cc
9ea4ad7
36680cc
 
 
9ea4ad7
36680cc
 
 
 
 
 
 
 
 
 
9ea4ad7
36680cc
 
 
 
 
 
 
 
 
 
 
 
8948c5a
 
 
 
 
 
 
 
 
 
36680cc
 
 
8948c5a
 
 
 
 
 
 
 
36680cc
 
 
9ea4ad7
36680cc
8948c5a
a0e5d2b
 
 
 
8948c5a
 
 
9ea4ad7
36680cc
9ea4ad7
8948c5a
9ea4ad7
a16fa05
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import streamlit as st
import re
import json
from PyPDF2 import PdfReader
from docx import Document
import spacy
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration
import os
from groq import Groq

# Initialize Groq API client
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# Initialize NLP components
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")
    # st.error("Please install the SpaCy English model: 'python -m spacy download en_core_web_sm'")
    # st.stop()

# Initialize models
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')

# Initialize T5 question generator with proper tokenizer
tokenizer = AutoTokenizer.from_pretrained("t5-base", use_fast=False)
model = T5ForConditionalGeneration.from_pretrained("t5-base")
question_generator = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    framework="pt"
)

def extract_text(file):
    """Extract text from various file formats"""
    if file.name.endswith('.pdf'):
        reader = PdfReader(file)
        return " ".join([page.extract_text() for page in reader.pages])
    elif file.name.endswith('.docx'):
        doc = Document(file)
        return " ".join([para.text for para in doc.paragraphs])
    elif file.name.endswith('.txt'):
        return file.read().decode()
    return ""

def extract_contact_info(text):
    """Extract phone numbers and emails using regex"""
    phone_pattern = r'\+?\d{1,3}[-.\s]?\(?\d{2,4}\)?[-.\s]?\d{3,4}[-.\s]?\d{4}'
    email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    
    phones = re.findall(phone_pattern, text)
    emails = re.findall(email_pattern, text)
    
    return {
        'phone': ', '.join(phones) if phones else 'Not found',
        'email': ', '.join(emails) if emails else 'Not found'
    }


def extract_name(text):
    """Extract candidate name using SpaCy NER"""
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == 'PERSON':
            return ent.text
    return "Not found"

def analyze_sections(text):
    """Parse resume sections using rule-based approach"""
    sections = {
        'experience': [],
        'skills': [],
        'education': [],
        'certifications': []
    }
    
    current_section = None
    section_keywords = {
        'experience': ['experience', 'work history', 'employment'],
        'skills': ['skills', 'competencies', 'technologies'],
        'education': ['education', 'academic background'],
        'certifications': ['certifications', 'licenses', 'courses']
    }
    
    for line in text.split('\n'):
        line_lower = line.strip().lower()
        
        # Detect section headers
        for section, keywords in section_keywords.items():
            if any(keyword in line_lower for keyword in keywords):
                current_section = section
                break
        else:
            if current_section and line.strip():
                sections[current_section].append(line.strip())
    
    return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}

def calculate_similarity(resume_text, jd_text):
    """Calculate semantic similarity between resume and JD"""
    embeddings = similarity_model.encode([resume_text, jd_text])
    return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() * 100

def generate_interview_questions(resume_text, jd_text):
    """Generate interview questions using Groq API"""
    input_text = f"Generate interview questions based on the resume and job description.Here is the resume: {resume_text}\n and here is the Job Description:{jd_text} Give me concise to the point questions only. Not description of resume or Job Description."
    
    response = client.chat.completions.create(
        messages=[
            {"role": "user", "content": input_text}
        ],
        model="deepseek-r1-distill-llama-70b"
    )
    return response.choices[0].message.content

# Streamlit UI Configuration
st.set_page_config(page_title="AI Resume Analyzer", layout="wide")

# Main Application
st.title("AI-Powered Resume Analyzer 🧠")
st.markdown("""
    Upload a candidate's resume and paste the job description to get:
    - Candidate profile analysis
    - Job requirement matching
    - Automated interview questions
""")

# File Upload and JD Input
with st.container():
    col1, col2 = st.columns([2, 3])
    
    with col1:
        uploaded_file = st.file_uploader(
            "Upload Resume (PDF/DOCX/TXT)",
            type=['pdf', 'docx', 'txt'],
            help="Supported formats: PDF, Word, Text"
        )
    
    with col2:
        jd_input = st.text_area(
            "Paste Job Description",
            height=200,
            placeholder="Paste the complete job description here..."
        )

if st.button("Process Resume"):
    if uploaded_file and jd_input:
        resume_text = extract_text(uploaded_file)
        
        if resume_text:
            # Candidate Profile Section
            st.header("πŸ‘€ Candidate Profile")
            profile_col1, profile_col2 = st.columns([1, 2])
            
            with profile_col1:
                st.subheader("Basic Information")
                name = extract_name(resume_text)
                contact = extract_contact_info(resume_text)
                
                st.markdown(f"""
                    **Name:** {name}  
                    **Phone:** {contact['phone']}  
                    **Email:** {contact['email']}
                """)
            
            with profile_col2:
                st.subheader("Professional Summary")
                sections = analyze_sections(resume_text)
                
                exp_col, edu_col = st.columns(2)
                with exp_col:
                    with st.expander("Work Experience"):
                        st.write(sections['experience'])
                
                with edu_col:
                    with st.expander("Education"):
                        st.write(sections['education'])
                
                skills_col, cert_col = st.columns(2)
                with skills_col:
                    with st.expander("Skills"):
                        st.write(sections['skills'])
                
                with cert_col:
                    with st.expander("Certifications"):
                        st.write(sections['certifications'])
    
            # Job Matching Analysis
            st.header("πŸ“Š Job Compatibility Analysis")
            match_score = calculate_similarity(resume_text, jd_input)
            
            col1, col2 = st.columns([1, 3])
            with col1:
                st.metric("Match Percentage", f"{match_score:.1f}%")
            
            with col2:
                st.progress(match_score/100)
                st.caption("Semantic similarity score between resume content and job description")
    
            # Interview Questions
            st.header("❓ Suggested Interview Questions")
            questions = generate_interview_questions(resume_text, jd_input)
            
            if questions:
                st.write(questions)
                # cleaned_questions = questions.replace("\\n", "\n").split("\n")
                # for i, q in enumerate(cleaned_questions[:5]):
                #     st.markdown(f"{i+1}. {q.strip()}")
            else:
                st.warning("Could not generate questions. Please try with more detailed inputs.")
    
    else:
        st.info("πŸ‘† Please upload a resume and enter a job description to begin analysis")

# Footer
st.markdown("---")
st.markdown("Built with β™₯ using [Streamlit](https://streamlit.io) | [Hugging Face](https://huggingface.co) | [Spacy](https://spacy.io) | FAISS | Groq AI")