Talha812 commited on
Commit
36680cc
Β·
verified Β·
1 Parent(s): 469df2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +237 -51
app.py CHANGED
@@ -1,33 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import re
3
- import os
4
- import faiss
5
- import numpy as np
6
  from PyPDF2 import PdfReader
7
  from docx import Document
8
  import spacy
9
- from sentence_transformers import SentenceTransformer
10
  from groq import Groq
11
 
12
- # Load NLP Model
13
  try:
14
  nlp = spacy.load("en_core_web_sm")
15
  except OSError:
16
  from spacy.cli import download
17
  download("en_core_web_sm")
18
  nlp = spacy.load("en_core_web_sm")
 
 
19
 
20
- # Load Sentence Transformer Model
21
  similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
22
 
23
- # Initialize Groq API Client
24
  client = Groq(api_key=os.environ["GROQ_API_KEY"])
25
 
26
  def extract_text(file):
27
- """Extract text from PDF, DOCX, or TXT file."""
28
  if file.name.endswith('.pdf'):
29
  reader = PdfReader(file)
30
- return " ".join([page.extract_text() for page in reader.pages if page.extract_text()])
31
  elif file.name.endswith('.docx'):
32
  doc = Document(file)
33
  return " ".join([para.text for para in doc.paragraphs])
@@ -36,7 +177,7 @@ def extract_text(file):
36
  return ""
37
 
38
  def extract_contact_info(text):
39
- """Extract phone numbers and emails."""
40
  phone_pattern = r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'
41
  email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
42
 
@@ -46,7 +187,7 @@ def extract_contact_info(text):
46
  }
47
 
48
  def extract_name(text):
49
- """Extract candidate name using NER."""
50
  doc = nlp(text)
51
  for ent in doc.ents:
52
  if ent.label_ == 'PERSON':
@@ -54,18 +195,26 @@ def extract_name(text):
54
  return "Not found"
55
 
56
  def analyze_sections(text):
57
- """Identify resume sections."""
58
- sections = {'experience': [], 'skills': [], 'education': [], 'certifications': []}
 
 
 
 
 
 
 
59
  section_keywords = {
60
  'experience': ['experience', 'work history', 'employment'],
61
  'skills': ['skills', 'competencies', 'technologies'],
62
  'education': ['education', 'academic background'],
63
  'certifications': ['certifications', 'licenses', 'courses']
64
  }
65
- current_section = None
66
 
67
  for line in text.split('\n'):
68
  line_lower = line.strip().lower()
 
 
69
  for section, keywords in section_keywords.items():
70
  if any(keyword in line_lower for keyword in keywords):
71
  current_section = section
@@ -76,62 +225,99 @@ def analyze_sections(text):
76
 
77
  return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}
78
 
79
- def create_faiss_index(resume_text, jd_text):
80
- """Create FAISS index for similarity retrieval."""
81
  embeddings = similarity_model.encode([resume_text, jd_text])
82
- index = faiss.IndexFlatL2(embeddings.shape[1])
83
- index.add(np.array([embeddings[0]])) # Add resume embedding
84
- distance, _ = index.search(np.array([embeddings[1]]), 1)
85
- return float((1 - distance[0][0]) * 100) # Convert to percentage similarity
86
 
87
  def generate_interview_questions(resume_text, jd_text):
88
- """Generate interview questions."""
89
- prompt = f"Generate 5 technical interview questions based on Resume and Job Description:\n\nResume: {resume_text[:1000]}\nJob Description: {jd_text[:500]}"
 
90
  response = client.chat.completions.create(
91
- messages=[{"role": "user", "content": prompt}],
92
- model="deepseek-r1-distill-qwen-32b",
 
 
93
  )
94
- return response.choices[0].message.content if response.choices else "No questions generated."
 
95
 
96
  # Streamlit UI Configuration
97
  st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
98
 
99
- st.title("🧠 AI-Powered Resume Analyzer")
100
- st.markdown("Analyze resumes, match job requirements, and generate interview questions instantly!")
 
 
 
 
 
 
101
 
102
- col1, col2 = st.columns([2, 3])
103
- with col1:
104
- uploaded_file = st.file_uploader("Upload Resume (PDF/DOCX/TXT)", type=['pdf', 'docx', 'txt'])
105
- with col2:
106
- jd_input = st.text_area("Paste Job Description", height=200)
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  if st.button("Process Resume"):
109
  if uploaded_file and jd_input:
110
  resume_text = extract_text(uploaded_file)
 
111
  if resume_text:
112
- st.subheader("πŸ“Œ Candidate Profile")
113
- name = extract_name(resume_text)
114
- contact = extract_contact_info(resume_text)
115
- st.write(f"**Name:** {name}\n\n**Phone:** {contact['phone']}\n\n**Email:** {contact['email']}")
116
 
117
- sections = analyze_sections(resume_text)
118
- st.subheader("πŸ“‚ Resume Sections")
119
- with st.expander("Experience"): st.write(sections['experience'])
120
- with st.expander("Education"): st.write(sections['education'])
121
- with st.expander("Skills"): st.write(sections['skills'])
122
- with st.expander("Certifications"): st.write(sections['certifications'])
 
 
 
 
123
 
124
- st.subheader("πŸ“Š Job Compatibility")
125
- match_score = create_faiss_index(resume_text, jd_input)
126
- st.metric("Match Percentage", f"{match_score:.1f}%")
127
- st.progress(match_score / 100)
 
 
 
 
 
 
 
 
128
 
129
- st.subheader("❓ Suggested Interview Questions")
 
 
 
 
 
 
130
  questions = generate_interview_questions(resume_text, jd_input)
131
- for i, q in enumerate(questions.split("\n")[:5]):
132
- st.write(f"{i+1}. {q.strip()}")
133
  else:
134
- st.warning("⚠️ Please upload a resume and enter a job description before processing.")
135
 
136
  st.markdown("---")
137
- st.markdown("πŸ”Ή Built with Streamlit, FAISS & Groq AI")
 
1
+ # import streamlit as st
2
+ # import re
3
+ # import os
4
+ # import faiss
5
+ # import numpy as np
6
+ # from PyPDF2 import PdfReader
7
+ # from docx import Document
8
+ # import spacy
9
+ # from sentence_transformers import SentenceTransformer
10
+ # from groq import Groq
11
+
12
+ # # Load NLP Model
13
+ # try:
14
+ # nlp = spacy.load("en_core_web_sm")
15
+ # except OSError:
16
+ # from spacy.cli import download
17
+ # download("en_core_web_sm")
18
+ # nlp = spacy.load("en_core_web_sm")
19
+
20
+ # # Load Sentence Transformer Model
21
+ # similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
22
+
23
+ # # Initialize Groq API Client
24
+ # client = Groq(api_key=os.environ["GROQ_API_KEY"])
25
+
26
+ # def extract_text(file):
27
+ # """Extract text from PDF, DOCX, or TXT file."""
28
+ # if file.name.endswith('.pdf'):
29
+ # reader = PdfReader(file)
30
+ # return " ".join([page.extract_text() for page in reader.pages if page.extract_text()])
31
+ # elif file.name.endswith('.docx'):
32
+ # doc = Document(file)
33
+ # return " ".join([para.text for para in doc.paragraphs])
34
+ # elif file.name.endswith('.txt'):
35
+ # return file.read().decode()
36
+ # return ""
37
+
38
+ # def extract_contact_info(text):
39
+ # """Extract phone numbers and emails."""
40
+ # phone_pattern = r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'
41
+ # email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
42
+
43
+ # return {
44
+ # 'phone': re.findall(phone_pattern, text)[0] if re.findall(phone_pattern, text) else 'Not found',
45
+ # 'email': re.findall(email_pattern, text)[0] if re.findall(email_pattern, text) else 'Not found'
46
+ # }
47
+
48
+ # def extract_name(text):
49
+ # """Extract candidate name using NER."""
50
+ # doc = nlp(text)
51
+ # for ent in doc.ents:
52
+ # if ent.label_ == 'PERSON':
53
+ # return ent.text
54
+ # return "Not found"
55
+
56
+ # def analyze_sections(text):
57
+ # """Identify resume sections."""
58
+ # sections = {'experience': [], 'skills': [], 'education': [], 'certifications': []}
59
+ # section_keywords = {
60
+ # 'experience': ['experience', 'work history', 'employment'],
61
+ # 'skills': ['skills', 'competencies', 'technologies'],
62
+ # 'education': ['education', 'academic background'],
63
+ # 'certifications': ['certifications', 'licenses', 'courses']
64
+ # }
65
+ # current_section = None
66
+
67
+ # for line in text.split('\n'):
68
+ # line_lower = line.strip().lower()
69
+ # for section, keywords in section_keywords.items():
70
+ # if any(keyword in line_lower for keyword in keywords):
71
+ # current_section = section
72
+ # break
73
+ # else:
74
+ # if current_section and line.strip():
75
+ # sections[current_section].append(line.strip())
76
+
77
+ # return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}
78
+
79
+ # def create_faiss_index(resume_text, jd_text):
80
+ # """Create FAISS index for similarity retrieval."""
81
+ # embeddings = similarity_model.encode([resume_text, jd_text])
82
+ # index = faiss.IndexFlatL2(embeddings.shape[1])
83
+ # index.add(np.array([embeddings[0]])) # Add resume embedding
84
+ # distance, _ = index.search(np.array([embeddings[1]]), 1)
85
+ # return float((1 - distance[0][0]) * 100) # Convert to percentage similarity
86
+
87
+ # def generate_interview_questions(resume_text, jd_text):
88
+ # """Generate interview questions."""
89
+ # prompt = f"Generate 5 technical interview questions based on Resume and Job Description:\n\nResume: {resume_text[:1000]}\nJob Description: {jd_text[:500]}"
90
+ # response = client.chat.completions.create(
91
+ # messages=[{"role": "user", "content": prompt}],
92
+ # model="deepseek-r1-distill-qwen-32b",
93
+ # )
94
+ # return response.choices[0].message.content if response.choices else "No questions generated."
95
+
96
+ # # Streamlit UI Configuration
97
+ # st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
98
+
99
+ # st.title("🧠 AI-Powered Resume Analyzer")
100
+ # st.markdown("Analyze resumes, match job requirements, and generate interview questions instantly!")
101
+
102
+ # col1, col2 = st.columns([2, 3])
103
+ # with col1:
104
+ # uploaded_file = st.file_uploader("Upload Resume (PDF/DOCX/TXT)", type=['pdf', 'docx', 'txt'])
105
+ # with col2:
106
+ # jd_input = st.text_area("Paste Job Description", height=200)
107
+
108
+ # if st.button("Process Resume"):
109
+ # if uploaded_file and jd_input:
110
+ # resume_text = extract_text(uploaded_file)
111
+ # if resume_text:
112
+ # st.subheader("πŸ“Œ Candidate Profile")
113
+ # name = extract_name(resume_text)
114
+ # contact = extract_contact_info(resume_text)
115
+ # st.write(f"**Name:** {name}\n\n**Phone:** {contact['phone']}\n\n**Email:** {contact['email']}")
116
+
117
+ # sections = analyze_sections(resume_text)
118
+ # st.subheader("πŸ“‚ Resume Sections")
119
+ # with st.expander("Experience"): st.write(sections['experience'])
120
+ # with st.expander("Education"): st.write(sections['education'])
121
+ # with st.expander("Skills"): st.write(sections['skills'])
122
+ # with st.expander("Certifications"): st.write(sections['certifications'])
123
+
124
+ # st.subheader("πŸ“Š Job Compatibility")
125
+ # match_score = create_faiss_index(resume_text, jd_input)
126
+ # st.metric("Match Percentage", f"{match_score:.1f}%")
127
+ # st.progress(match_score / 100)
128
+
129
+ # st.subheader("❓ Suggested Interview Questions")
130
+ # questions = generate_interview_questions(resume_text, jd_input)
131
+ # for i, q in enumerate(questions.split("\n")[:5]):
132
+ # st.write(f"{i+1}. {q.strip()}")
133
+ # else:
134
+ # st.warning("⚠️ Please upload a resume and enter a job description before processing.")
135
+
136
+ # st.markdown("---")
137
+ # st.markdown("πŸ”Ή Built with Streamlit, FAISS & Groq AI")
138
+
139
+
140
+
141
+ import os
142
  import streamlit as st
143
  import re
144
+ import json
 
 
145
  from PyPDF2 import PdfReader
146
  from docx import Document
147
  import spacy
148
+ from sentence_transformers import SentenceTransformer, util
149
  from groq import Groq
150
 
151
+ # Initialize NLP components
152
  try:
153
  nlp = spacy.load("en_core_web_sm")
154
  except OSError:
155
  from spacy.cli import download
156
  download("en_core_web_sm")
157
  nlp = spacy.load("en_core_web_sm")
158
+ # st.error("Please install the SpaCy English model: 'python -m spacy download en_core_web_sm'")
159
+ # st.stop()
160
 
161
+ # Initialize models
162
  similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
163
 
164
+ # Initialize Groq API client
165
  client = Groq(api_key=os.environ["GROQ_API_KEY"])
166
 
167
  def extract_text(file):
168
+ """Extract text from various file formats"""
169
  if file.name.endswith('.pdf'):
170
  reader = PdfReader(file)
171
+ return " ".join([page.extract_text() for page in reader.pages])
172
  elif file.name.endswith('.docx'):
173
  doc = Document(file)
174
  return " ".join([para.text for para in doc.paragraphs])
 
177
  return ""
178
 
179
  def extract_contact_info(text):
180
+ """Extract phone numbers and emails using regex patterns"""
181
  phone_pattern = r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'
182
  email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
183
 
 
187
  }
188
 
189
  def extract_name(text):
190
+ """Extract candidate name using SpaCy NER"""
191
  doc = nlp(text)
192
  for ent in doc.ents:
193
  if ent.label_ == 'PERSON':
 
195
  return "Not found"
196
 
197
  def analyze_sections(text):
198
+ """Parse resume sections using rule-based approach"""
199
+ sections = {
200
+ 'experience': [],
201
+ 'skills': [],
202
+ 'education': [],
203
+ 'certifications': []
204
+ }
205
+
206
+ current_section = None
207
  section_keywords = {
208
  'experience': ['experience', 'work history', 'employment'],
209
  'skills': ['skills', 'competencies', 'technologies'],
210
  'education': ['education', 'academic background'],
211
  'certifications': ['certifications', 'licenses', 'courses']
212
  }
 
213
 
214
  for line in text.split('\n'):
215
  line_lower = line.strip().lower()
216
+
217
+ # Detect section headers
218
  for section, keywords in section_keywords.items():
219
  if any(keyword in line_lower for keyword in keywords):
220
  current_section = section
 
225
 
226
  return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}
227
 
228
+ def calculate_similarity(resume_text, jd_text):
229
+ """Calculate semantic similarity between resume and JD"""
230
  embeddings = similarity_model.encode([resume_text, jd_text])
231
+ return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() * 100
 
 
 
232
 
233
  def generate_interview_questions(resume_text, jd_text):
234
+ """Generate interview questions using Groq API"""
235
+ input_text = f"Generate 5 technical interview questions based on resume and job description.\nResume: {resume_text[:1000]}\nJob Description: {jd_text[:500]}"
236
+
237
  response = client.chat.completions.create(
238
+ messages=[
239
+ {"role": "user", "content": input_text}
240
+ ],
241
+ model="llama-3.3-70b-versatile",
242
  )
243
+
244
+ return response.choices[0].message.content if response.choices else "Could not generate questions."
245
 
246
  # Streamlit UI Configuration
247
  st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
248
 
249
+ # Main Application
250
+ st.title("AI-Powered Resume Analyzer 🧠")
251
+ st.markdown("""
252
+ Upload a candidate's resume and paste the job description to get:
253
+ - Candidate profile analysis
254
+ - Job requirement matching
255
+ - Automated interview questions
256
+ """)
257
 
258
+ # File Upload and JD Input
259
+ with st.container():
260
+ col1, col2 = st.columns([2, 3])
261
+
262
+ with col1:
263
+ uploaded_file = st.file_uploader(
264
+ "Upload Resume (PDF/DOCX/TXT)",
265
+ type=['pdf', 'docx', 'txt'],
266
+ help="Supported formats: PDF, Word, Text"
267
+ )
268
+
269
+ with col2:
270
+ jd_input = st.text_area(
271
+ "Paste Job Description",
272
+ height=200,
273
+ placeholder="Paste the complete job description here..."
274
+ )
275
 
276
  if st.button("Process Resume"):
277
  if uploaded_file and jd_input:
278
  resume_text = extract_text(uploaded_file)
279
+
280
  if resume_text:
281
+ # Candidate Profile Section
282
+ st.header("πŸ‘€ Candidate Profile")
283
+ profile_col1, profile_col2 = st.columns([1, 2])
 
284
 
285
+ with profile_col1:
286
+ st.subheader("Basic Information")
287
+ name = extract_name(resume_text)
288
+ contact = extract_contact_info(resume_text)
289
+
290
+ st.markdown(f"""
291
+ **Name:** {name}
292
+ **Phone:** {contact['phone']}
293
+ **Email:** {contact['email']}
294
+ """)
295
 
296
+ with profile_col2:
297
+ st.subheader("Professional Summary")
298
+ sections = analyze_sections(resume_text)
299
+
300
+ exp_col, edu_col = st.columns(2)
301
+ with exp_col:
302
+ with st.expander("Work Experience"):
303
+ st.write(sections['experience'])
304
+
305
+ with edu_col:
306
+ with st.expander("Education"):
307
+ st.write(sections['education'])
308
 
309
+ # Job Matching Analysis
310
+ st.header("πŸ“Š Job Compatibility Analysis")
311
+ match_score = calculate_similarity(resume_text, jd_input)
312
+ st.metric("Match Percentage", f"{match_score:.1f}%")
313
+
314
+ # Interview Questions
315
+ st.header("❓ Suggested Interview Questions")
316
  questions = generate_interview_questions(resume_text, jd_input)
317
+
318
+ st.write(questions)
319
  else:
320
+ st.info("πŸ‘† Please upload a resume and enter a job description to begin analysis")
321
 
322
  st.markdown("---")
323
+ st.markdown("Built with β™₯ using [Streamlit](https://streamlit.io) | [Hugging Face](https://huggingface.co) | [Spacy](https://spacy.io) | FAISS | Groq AI")