Spaces:

Talha812
/

AI-Powered-RecruitmentAssistant

Sleeping

App Files Files Community

Talha812 commited on Feb 16

Commit

36680cc

verified ·

1 Parent(s): 469df2a

Update app.py

Browse files

Files changed (1) hide show

app.py +237 -51

app.py CHANGED Viewed

@@ -1,33 +1,174 @@
 import streamlit as st
 import re
-import os
-import faiss
-import numpy as np
 from PyPDF2 import PdfReader
 from docx import Document
 import spacy
-from sentence_transformers import SentenceTransformer
 from groq import Groq
-# Load NLP Model
 try:
     nlp = spacy.load("en_core_web_sm")
 except OSError:
     from spacy.cli import download
     download("en_core_web_sm")
     nlp = spacy.load("en_core_web_sm")
-# Load Sentence Transformer Model
 similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
-# Initialize Groq API Client
 client = Groq(api_key=os.environ["GROQ_API_KEY"])
 def extract_text(file):
-    """Extract text from PDF, DOCX, or TXT file."""
     if file.name.endswith('.pdf'):
         reader = PdfReader(file)
-        return " ".join([page.extract_text() for page in reader.pages if page.extract_text()])
     elif file.name.endswith('.docx'):
         doc = Document(file)
         return " ".join([para.text for para in doc.paragraphs])
@@ -36,7 +177,7 @@ def extract_text(file):
     return ""
 def extract_contact_info(text):
-    """Extract phone numbers and emails."""
     phone_pattern = r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'
     email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
@@ -46,7 +187,7 @@ def extract_contact_info(text):
     }
 def extract_name(text):
-    """Extract candidate name using NER."""
     doc = nlp(text)
     for ent in doc.ents:
         if ent.label_ == 'PERSON':
@@ -54,18 +195,26 @@ def extract_name(text):
     return "Not found"
 def analyze_sections(text):
-    """Identify resume sections."""
-    sections = {'experience': [], 'skills': [], 'education': [], 'certifications': []}
     section_keywords = {
         'experience': ['experience', 'work history', 'employment'],
         'skills': ['skills', 'competencies', 'technologies'],
         'education': ['education', 'academic background'],
         'certifications': ['certifications', 'licenses', 'courses']
     }
-    current_section = None
     for line in text.split('\n'):
         line_lower = line.strip().lower()
         for section, keywords in section_keywords.items():
             if any(keyword in line_lower for keyword in keywords):
                 current_section = section
@@ -76,62 +225,99 @@ def analyze_sections(text):
     return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}
-def create_faiss_index(resume_text, jd_text):
-    """Create FAISS index for similarity retrieval."""
     embeddings = similarity_model.encode([resume_text, jd_text])
-    index = faiss.IndexFlatL2(embeddings.shape[1])
-    index.add(np.array([embeddings[0]]))  # Add resume embedding
-    distance, _ = index.search(np.array([embeddings[1]]), 1)
-    return float((1 - distance[0][0]) * 100)  # Convert to percentage similarity
 def generate_interview_questions(resume_text, jd_text):
-    """Generate interview questions."""
-    prompt = f"Generate 5 technical interview questions based on Resume and Job Description:\n\nResume: {resume_text[:1000]}\nJob Description: {jd_text[:500]}"
     response = client.chat.completions.create(
-        messages=[{"role": "user", "content": prompt}],
-        model="deepseek-r1-distill-qwen-32b",
     )
-    return response.choices[0].message.content if response.choices else "No questions generated."
 # Streamlit UI Configuration
 st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
-st.title("🧠 AI-Powered Resume Analyzer")
-st.markdown("Analyze resumes, match job requirements, and generate interview questions instantly!")
-col1, col2 = st.columns([2, 3])
-with col1:
-    uploaded_file = st.file_uploader("Upload Resume (PDF/DOCX/TXT)", type=['pdf', 'docx', 'txt'])
-with col2:
-    jd_input = st.text_area("Paste Job Description", height=200)
 if st.button("Process Resume"):
     if uploaded_file and jd_input:
         resume_text = extract_text(uploaded_file)
         if resume_text:
-            st.subheader("📌 Candidate Profile")
-            name = extract_name(resume_text)
-            contact = extract_contact_info(resume_text)
-            st.write(f"**Name:** {name}\n\n**Phone:** {contact['phone']}\n\n**Email:** {contact['email']}")
-            sections = analyze_sections(resume_text)
-            st.subheader("📂 Resume Sections")
-            with st.expander("Experience"): st.write(sections['experience'])
-            with st.expander("Education"): st.write(sections['education'])
-            with st.expander("Skills"): st.write(sections['skills'])
-            with st.expander("Certifications"): st.write(sections['certifications'])
-            st.subheader("📊 Job Compatibility")
-            match_score = create_faiss_index(resume_text, jd_input)
-            st.metric("Match Percentage", f"{match_score:.1f}%")
-            st.progress(match_score / 100)
-            st.subheader("❓ Suggested Interview Questions")
             questions = generate_interview_questions(resume_text, jd_input)
-            for i, q in enumerate(questions.split("\n")[:5]):
-                st.write(f"{i+1}. {q.strip()}")
     else:
-        st.warning("⚠️ Please upload a resume and enter a job description before processing.")
 st.markdown("---")
-st.markdown("🔹 Built with Streamlit, FAISS & Groq AI")

+# import streamlit as st
+# import re
+# import os
+# import faiss
+# import numpy as np
+# from PyPDF2 import PdfReader
+# from docx import Document
+# import spacy
+# from sentence_transformers import SentenceTransformer
+# from groq import Groq
+# # Load NLP Model
+# try:
+#     nlp = spacy.load("en_core_web_sm")
+# except OSError:
+#     from spacy.cli import download
+#     download("en_core_web_sm")
+#     nlp = spacy.load("en_core_web_sm")
+# # Load Sentence Transformer Model
+# similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
+# # Initialize Groq API Client
+# client = Groq(api_key=os.environ["GROQ_API_KEY"])
+# def extract_text(file):
+#     """Extract text from PDF, DOCX, or TXT file."""
+#     if file.name.endswith('.pdf'):
+#         reader = PdfReader(file)
+#         return " ".join([page.extract_text() for page in reader.pages if page.extract_text()])
+#     elif file.name.endswith('.docx'):
+#         doc = Document(file)
+#         return " ".join([para.text for para in doc.paragraphs])
+#     elif file.name.endswith('.txt'):
+#         return file.read().decode()
+#     return ""
+# def extract_contact_info(text):
+#     """Extract phone numbers and emails."""
+#     phone_pattern = r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'
+#     email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
+#     return {
+#         'phone': re.findall(phone_pattern, text)[0] if re.findall(phone_pattern, text) else 'Not found',
+#         'email': re.findall(email_pattern, text)[0] if re.findall(email_pattern, text) else 'Not found'
+#     }
+# def extract_name(text):
+#     """Extract candidate name using NER."""
+#     doc = nlp(text)
+#     for ent in doc.ents:
+#         if ent.label_ == 'PERSON':
+#             return ent.text
+#     return "Not found"
+# def analyze_sections(text):
+#     """Identify resume sections."""
+#     sections = {'experience': [], 'skills': [], 'education': [], 'certifications': []}
+#     section_keywords = {
+#         'experience': ['experience', 'work history', 'employment'],
+#         'skills': ['skills', 'competencies', 'technologies'],
+#         'education': ['education', 'academic background'],
+#         'certifications': ['certifications', 'licenses', 'courses']
+#     }
+#     current_section = None
+#     for line in text.split('\n'):
+#         line_lower = line.strip().lower()
+#         for section, keywords in section_keywords.items():
+#             if any(keyword in line_lower for keyword in keywords):
+#                 current_section = section
+#                 break
+#         else:
+#             if current_section and line.strip():
+#                 sections[current_section].append(line.strip())
+#     return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}
+# def create_faiss_index(resume_text, jd_text):
+#     """Create FAISS index for similarity retrieval."""
+#     embeddings = similarity_model.encode([resume_text, jd_text])
+#     index = faiss.IndexFlatL2(embeddings.shape[1])
+#     index.add(np.array([embeddings[0]]))  # Add resume embedding
+#     distance, _ = index.search(np.array([embeddings[1]]), 1)
+#     return float((1 - distance[0][0]) * 100)  # Convert to percentage similarity
+# def generate_interview_questions(resume_text, jd_text):
+#     """Generate interview questions."""
+#     prompt = f"Generate 5 technical interview questions based on Resume and Job Description:\n\nResume: {resume_text[:1000]}\nJob Description: {jd_text[:500]}"
+#     response = client.chat.completions.create(
+#         messages=[{"role": "user", "content": prompt}],
+#         model="deepseek-r1-distill-qwen-32b",
+#     )
+#     return response.choices[0].message.content if response.choices else "No questions generated."
+# # Streamlit UI Configuration
+# st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
+# st.title("🧠 AI-Powered Resume Analyzer")
+# st.markdown("Analyze resumes, match job requirements, and generate interview questions instantly!")
+# col1, col2 = st.columns([2, 3])
+# with col1:
+#     uploaded_file = st.file_uploader("Upload Resume (PDF/DOCX/TXT)", type=['pdf', 'docx', 'txt'])
+# with col2:
+#     jd_input = st.text_area("Paste Job Description", height=200)
+# if st.button("Process Resume"):
+#     if uploaded_file and jd_input:
+#         resume_text = extract_text(uploaded_file)
+#         if resume_text:
+#             st.subheader("📌 Candidate Profile")
+#             name = extract_name(resume_text)
+#             contact = extract_contact_info(resume_text)
+#             st.write(f"**Name:** {name}\n\n**Phone:** {contact['phone']}\n\n**Email:** {contact['email']}")
+#             sections = analyze_sections(resume_text)
+#             st.subheader("📂 Resume Sections")
+#             with st.expander("Experience"): st.write(sections['experience'])
+#             with st.expander("Education"): st.write(sections['education'])
+#             with st.expander("Skills"): st.write(sections['skills'])
+#             with st.expander("Certifications"): st.write(sections['certifications'])
+#             st.subheader("📊 Job Compatibility")
+#             match_score = create_faiss_index(resume_text, jd_input)
+#             st.metric("Match Percentage", f"{match_score:.1f}%")
+#             st.progress(match_score / 100)
+#             st.subheader("❓ Suggested Interview Questions")
+#             questions = generate_interview_questions(resume_text, jd_input)
+#             for i, q in enumerate(questions.split("\n")[:5]):
+#                 st.write(f"{i+1}. {q.strip()}")
+#     else:
+#         st.warning("⚠️ Please upload a resume and enter a job description before processing.")
+# st.markdown("---")
+# st.markdown("🔹 Built with Streamlit, FAISS & Groq AI")
+import os
 import streamlit as st
 import re
+import json
 from PyPDF2 import PdfReader
 from docx import Document
 import spacy
+from sentence_transformers import SentenceTransformer, util
 from groq import Groq
+# Initialize NLP components
 try:
     nlp = spacy.load("en_core_web_sm")
 except OSError:
     from spacy.cli import download
     download("en_core_web_sm")
     nlp = spacy.load("en_core_web_sm")
+    # st.error("Please install the SpaCy English model: 'python -m spacy download en_core_web_sm'")
+    # st.stop()
+# Initialize models
 similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Initialize Groq API client
 client = Groq(api_key=os.environ["GROQ_API_KEY"])
 def extract_text(file):
+    """Extract text from various file formats"""
     if file.name.endswith('.pdf'):
         reader = PdfReader(file)
+        return " ".join([page.extract_text() for page in reader.pages])
     elif file.name.endswith('.docx'):
         doc = Document(file)
         return " ".join([para.text for para in doc.paragraphs])
     return ""
 def extract_contact_info(text):
+    """Extract phone numbers and emails using regex patterns"""
     phone_pattern = r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'
     email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
     }
 def extract_name(text):
+    """Extract candidate name using SpaCy NER"""
     doc = nlp(text)
     for ent in doc.ents:
         if ent.label_ == 'PERSON':
     return "Not found"
 def analyze_sections(text):
+    """Parse resume sections using rule-based approach"""
+    sections = {
+        'experience': [],
+        'skills': [],
+        'education': [],
+        'certifications': []
+    }
+    current_section = None
     section_keywords = {
         'experience': ['experience', 'work history', 'employment'],
         'skills': ['skills', 'competencies', 'technologies'],
         'education': ['education', 'academic background'],
         'certifications': ['certifications', 'licenses', 'courses']
     }
     for line in text.split('\n'):
         line_lower = line.strip().lower()
+        # Detect section headers
         for section, keywords in section_keywords.items():
             if any(keyword in line_lower for keyword in keywords):
                 current_section = section
     return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}
+def calculate_similarity(resume_text, jd_text):
+    """Calculate semantic similarity between resume and JD"""
     embeddings = similarity_model.encode([resume_text, jd_text])
+    return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() * 100
 def generate_interview_questions(resume_text, jd_text):
+    """Generate interview questions using Groq API"""
+    input_text = f"Generate 5 technical interview questions based on resume and job description.\nResume: {resume_text[:1000]}\nJob Description: {jd_text[:500]}"
     response = client.chat.completions.create(
+        messages=[
+            {"role": "user", "content": input_text}
+        ],
+        model="llama-3.3-70b-versatile",
     )
+    return response.choices[0].message.content if response.choices else "Could not generate questions."
 # Streamlit UI Configuration
 st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
+# Main Application
+st.title("AI-Powered Resume Analyzer 🧠")
+st.markdown("""
+    Upload a candidate's resume and paste the job description to get:
+    - Candidate profile analysis
+    - Job requirement matching
+    - Automated interview questions
+""")
+# File Upload and JD Input
+with st.container():
+    col1, col2 = st.columns([2, 3])
+    with col1:
+        uploaded_file = st.file_uploader(
+            "Upload Resume (PDF/DOCX/TXT)",
+            type=['pdf', 'docx', 'txt'],
+            help="Supported formats: PDF, Word, Text"
+        )
+    with col2:
+        jd_input = st.text_area(
+            "Paste Job Description",
+            height=200,
+            placeholder="Paste the complete job description here..."
+        )
 if st.button("Process Resume"):
     if uploaded_file and jd_input:
         resume_text = extract_text(uploaded_file)
         if resume_text:
+            # Candidate Profile Section
+            st.header("👤 Candidate Profile")
+            profile_col1, profile_col2 = st.columns([1, 2])
+            with profile_col1:
+                st.subheader("Basic Information")
+                name = extract_name(resume_text)
+                contact = extract_contact_info(resume_text)
+                st.markdown(f"""
+                    **Name:** {name}
+                    **Phone:** {contact['phone']}
+                    **Email:** {contact['email']}
+                """)
+            with profile_col2:
+                st.subheader("Professional Summary")
+                sections = analyze_sections(resume_text)
+                exp_col, edu_col = st.columns(2)
+                with exp_col:
+                    with st.expander("Work Experience"):
+                        st.write(sections['experience'])
+                with edu_col:
+                    with st.expander("Education"):
+                        st.write(sections['education'])
+            # Job Matching Analysis
+            st.header("📊 Job Compatibility Analysis")
+            match_score = calculate_similarity(resume_text, jd_input)
+            st.metric("Match Percentage", f"{match_score:.1f}%")
+            # Interview Questions
+            st.header("❓ Suggested Interview Questions")
             questions = generate_interview_questions(resume_text, jd_input)
+            st.write(questions)
     else:
+        st.info("👆 Please upload a resume and enter a job description to begin analysis")
 st.markdown("---")
+st.markdown("Built with ♥ using [Streamlit](https://streamlit.io) | [Hugging Face](https://huggingface.co) | [Spacy](https://spacy.io) | FAISS | Groq AI")