Spaces:

Talha812
/

AI-Powered-RecruitmentAssistant

Sleeping

App Files Files Community

Talha812 commited on Feb 16

Commit

8948c5a

verified ·

1 Parent(s): 36680cc

Update app.py

Browse files

Files changed (1) hide show

app.py +280 -59

app.py CHANGED Viewed

@@ -1,33 +1,174 @@
 # import streamlit as st
 # import re
-# import os
-# import faiss
-# import numpy as np
 # from PyPDF2 import PdfReader
 # from docx import Document
 # import spacy
-# from sentence_transformers import SentenceTransformer
 # from groq import Groq
-# # Load NLP Model
 # try:
 #     nlp = spacy.load("en_core_web_sm")
 # except OSError:
 #     from spacy.cli import download
 #     download("en_core_web_sm")
 #     nlp = spacy.load("en_core_web_sm")
-# # Load Sentence Transformer Model
 # similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
-# # Initialize Groq API Client
 # client = Groq(api_key=os.environ["GROQ_API_KEY"])
 # def extract_text(file):
-#     """Extract text from PDF, DOCX, or TXT file."""
 #     if file.name.endswith('.pdf'):
 #         reader = PdfReader(file)
-#         return " ".join([page.extract_text() for page in reader.pages if page.extract_text()])
 #     elif file.name.endswith('.docx'):
 #         doc = Document(file)
 #         return " ".join([para.text for para in doc.paragraphs])
@@ -36,7 +177,7 @@
 #     return ""
 # def extract_contact_info(text):
-#     """Extract phone numbers and emails."""
 #     phone_pattern = r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'
 #     email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
@@ -46,7 +187,7 @@
 #     }
 # def extract_name(text):
-#     """Extract candidate name using NER."""
 #     doc = nlp(text)
 #     for ent in doc.ents:
 #         if ent.label_ == 'PERSON':
@@ -54,18 +195,26 @@
 #     return "Not found"
 # def analyze_sections(text):
-#     """Identify resume sections."""
-#     sections = {'experience': [], 'skills': [], 'education': [], 'certifications': []}
 #     section_keywords = {
 #         'experience': ['experience', 'work history', 'employment'],
 #         'skills': ['skills', 'competencies', 'technologies'],
 #         'education': ['education', 'academic background'],
 #         'certifications': ['certifications', 'licenses', 'courses']
 #     }
-#     current_section = None
 #     for line in text.split('\n'):
 #         line_lower = line.strip().lower()
 #         for section, keywords in section_keywords.items():
 #             if any(keyword in line_lower for keyword in keywords):
 #                 current_section = section
@@ -76,69 +225,105 @@
 #     return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}
-# def create_faiss_index(resume_text, jd_text):
-#     """Create FAISS index for similarity retrieval."""
 #     embeddings = similarity_model.encode([resume_text, jd_text])
-#     index = faiss.IndexFlatL2(embeddings.shape[1])
-#     index.add(np.array([embeddings[0]]))  # Add resume embedding
-#     distance, _ = index.search(np.array([embeddings[1]]), 1)
-#     return float((1 - distance[0][0]) * 100)  # Convert to percentage similarity
 # def generate_interview_questions(resume_text, jd_text):
-#     """Generate interview questions."""
-#     prompt = f"Generate 5 technical interview questions based on Resume and Job Description:\n\nResume: {resume_text[:1000]}\nJob Description: {jd_text[:500]}"
 #     response = client.chat.completions.create(
-#         messages=[{"role": "user", "content": prompt}],
-#         model="deepseek-r1-distill-qwen-32b",
 #     )
-#     return response.choices[0].message.content if response.choices else "No questions generated."
 # # Streamlit UI Configuration
 # st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
-# st.title("🧠 AI-Powered Resume Analyzer")
-# st.markdown("Analyze resumes, match job requirements, and generate interview questions instantly!")
-# col1, col2 = st.columns([2, 3])
-# with col1:
-#     uploaded_file = st.file_uploader("Upload Resume (PDF/DOCX/TXT)", type=['pdf', 'docx', 'txt'])
-# with col2:
-#     jd_input = st.text_area("Paste Job Description", height=200)
 # if st.button("Process Resume"):
 #     if uploaded_file and jd_input:
 #         resume_text = extract_text(uploaded_file)
 #         if resume_text:
-#             st.subheader("📌 Candidate Profile")
-#             name = extract_name(resume_text)
-#             contact = extract_contact_info(resume_text)
-#             st.write(f"**Name:** {name}\n\n**Phone:** {contact['phone']}\n\n**Email:** {contact['email']}")
-#             sections = analyze_sections(resume_text)
-#             st.subheader("📂 Resume Sections")
-#             with st.expander("Experience"): st.write(sections['experience'])
-#             with st.expander("Education"): st.write(sections['education'])
-#             with st.expander("Skills"): st.write(sections['skills'])
-#             with st.expander("Certifications"): st.write(sections['certifications'])
-#             st.subheader("📊 Job Compatibility")
-#             match_score = create_faiss_index(resume_text, jd_input)
 #             st.metric("Match Percentage", f"{match_score:.1f}%")
-#             st.progress(match_score / 100)
-#             st.subheader("❓ Suggested Interview Questions")
 #             questions = generate_interview_questions(resume_text, jd_input)
-#             for i, q in enumerate(questions.split("\n")[:5]):
-#                 st.write(f"{i+1}. {q.strip()}")
 #     else:
-#         st.warning("⚠️ Please upload a resume and enter a job description before processing.")
 # st.markdown("---")
-# st.markdown("🔹 Built with Streamlit, FAISS & Groq AI")
-import os
 import streamlit as st
 import re
 import json
@@ -146,8 +331,13 @@ from PyPDF2 import PdfReader
 from docx import Document
 import spacy
 from sentence_transformers import SentenceTransformer, util
 from groq import Groq
 # Initialize NLP components
 try:
     nlp = spacy.load("en_core_web_sm")
@@ -161,8 +351,15 @@ except OSError:
 # Initialize models
 similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
-# Initialize Groq API client
-client = Groq(api_key=os.environ["GROQ_API_KEY"])
 def extract_text(file):
     """Extract text from various file formats"""
@@ -232,7 +429,7 @@ def calculate_similarity(resume_text, jd_text):
 def generate_interview_questions(resume_text, jd_text):
     """Generate interview questions using Groq API"""
-    input_text = f"Generate 5 technical interview questions based on resume and job description.\nResume: {resume_text[:1000]}\nJob Description: {jd_text[:500]}"
     response = client.chat.completions.create(
         messages=[
@@ -243,6 +440,7 @@ def generate_interview_questions(resume_text, jd_text):
     return response.choices[0].message.content if response.choices else "Could not generate questions."
 # Streamlit UI Configuration
 st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
@@ -305,19 +503,42 @@ if st.button("Process Resume"):
                 with edu_col:
                     with st.expander("Education"):
                         st.write(sections['education'])
             # Job Matching Analysis
             st.header("📊 Job Compatibility Analysis")
             match_score = calculate_similarity(resume_text, jd_input)
-            st.metric("Match Percentage", f"{match_score:.1f}%")
             # Interview Questions
             st.header("❓ Suggested Interview Questions")
             questions = generate_interview_questions(resume_text, jd_input)
-            st.write(questions)
     else:
         st.info("👆 Please upload a resume and enter a job description to begin analysis")
 st.markdown("---")
-st.markdown("Built with ♥ using [Streamlit](https://streamlit.io) | [Hugging Face](https://huggingface.co) | [Spacy](https://spacy.io) | FAISS | Groq AI")

+# # import streamlit as st
+# # import re
+# # import os
+# # import faiss
+# # import numpy as np
+# # from PyPDF2 import PdfReader
+# # from docx import Document
+# # import spacy
+# # from sentence_transformers import SentenceTransformer
+# # from groq import Groq
+# # # Load NLP Model
+# # try:
+# #     nlp = spacy.load("en_core_web_sm")
+# # except OSError:
+# #     from spacy.cli import download
+# #     download("en_core_web_sm")
+# #     nlp = spacy.load("en_core_web_sm")
+# # # Load Sentence Transformer Model
+# # similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
+# # # Initialize Groq API Client
+# # client = Groq(api_key=os.environ["GROQ_API_KEY"])
+# # def extract_text(file):
+# #     """Extract text from PDF, DOCX, or TXT file."""
+# #     if file.name.endswith('.pdf'):
+# #         reader = PdfReader(file)
+# #         return " ".join([page.extract_text() for page in reader.pages if page.extract_text()])
+# #     elif file.name.endswith('.docx'):
+# #         doc = Document(file)
+# #         return " ".join([para.text for para in doc.paragraphs])
+# #     elif file.name.endswith('.txt'):
+# #         return file.read().decode()
+# #     return ""
+# # def extract_contact_info(text):
+# #     """Extract phone numbers and emails."""
+# #     phone_pattern = r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'
+# #     email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
+# #     return {
+# #         'phone': re.findall(phone_pattern, text)[0] if re.findall(phone_pattern, text) else 'Not found',
+# #         'email': re.findall(email_pattern, text)[0] if re.findall(email_pattern, text) else 'Not found'
+# #     }
+# # def extract_name(text):
+# #     """Extract candidate name using NER."""
+# #     doc = nlp(text)
+# #     for ent in doc.ents:
+# #         if ent.label_ == 'PERSON':
+# #             return ent.text
+# #     return "Not found"
+# # def analyze_sections(text):
+# #     """Identify resume sections."""
+# #     sections = {'experience': [], 'skills': [], 'education': [], 'certifications': []}
+# #     section_keywords = {
+# #         'experience': ['experience', 'work history', 'employment'],
+# #         'skills': ['skills', 'competencies', 'technologies'],
+# #         'education': ['education', 'academic background'],
+# #         'certifications': ['certifications', 'licenses', 'courses']
+# #     }
+# #     current_section = None
+# #     for line in text.split('\n'):
+# #         line_lower = line.strip().lower()
+# #         for section, keywords in section_keywords.items():
+# #             if any(keyword in line_lower for keyword in keywords):
+# #                 current_section = section
+# #                 break
+# #         else:
+# #             if current_section and line.strip():
+# #                 sections[current_section].append(line.strip())
+# #     return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}
+# # def create_faiss_index(resume_text, jd_text):
+# #     """Create FAISS index for similarity retrieval."""
+# #     embeddings = similarity_model.encode([resume_text, jd_text])
+# #     index = faiss.IndexFlatL2(embeddings.shape[1])
+# #     index.add(np.array([embeddings[0]]))  # Add resume embedding
+# #     distance, _ = index.search(np.array([embeddings[1]]), 1)
+# #     return float((1 - distance[0][0]) * 100)  # Convert to percentage similarity
+# # def generate_interview_questions(resume_text, jd_text):
+# #     """Generate interview questions."""
+# #     prompt = f"Generate 5 technical interview questions based on Resume and Job Description:\n\nResume: {resume_text[:1000]}\nJob Description: {jd_text[:500]}"
+# #     response = client.chat.completions.create(
+# #         messages=[{"role": "user", "content": prompt}],
+# #         model="deepseek-r1-distill-qwen-32b",
+# #     )
+# #     return response.choices[0].message.content if response.choices else "No questions generated."
+# # # Streamlit UI Configuration
+# # st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
+# # st.title("🧠 AI-Powered Resume Analyzer")
+# # st.markdown("Analyze resumes, match job requirements, and generate interview questions instantly!")
+# # col1, col2 = st.columns([2, 3])
+# # with col1:
+# #     uploaded_file = st.file_uploader("Upload Resume (PDF/DOCX/TXT)", type=['pdf', 'docx', 'txt'])
+# # with col2:
+# #     jd_input = st.text_area("Paste Job Description", height=200)
+# # if st.button("Process Resume"):
+# #     if uploaded_file and jd_input:
+# #         resume_text = extract_text(uploaded_file)
+# #         if resume_text:
+# #             st.subheader("📌 Candidate Profile")
+# #             name = extract_name(resume_text)
+# #             contact = extract_contact_info(resume_text)
+# #             st.write(f"**Name:** {name}\n\n**Phone:** {contact['phone']}\n\n**Email:** {contact['email']}")
+# #             sections = analyze_sections(resume_text)
+# #             st.subheader("📂 Resume Sections")
+# #             with st.expander("Experience"): st.write(sections['experience'])
+# #             with st.expander("Education"): st.write(sections['education'])
+# #             with st.expander("Skills"): st.write(sections['skills'])
+# #             with st.expander("Certifications"): st.write(sections['certifications'])
+# #             st.subheader("📊 Job Compatibility")
+# #             match_score = create_faiss_index(resume_text, jd_input)
+# #             st.metric("Match Percentage", f"{match_score:.1f}%")
+# #             st.progress(match_score / 100)
+# #             st.subheader("❓ Suggested Interview Questions")
+# #             questions = generate_interview_questions(resume_text, jd_input)
+# #             for i, q in enumerate(questions.split("\n")[:5]):
+# #                 st.write(f"{i+1}. {q.strip()}")
+# #     else:
+# #         st.warning("⚠️ Please upload a resume and enter a job description before processing.")
+# # st.markdown("---")
+# # st.markdown("🔹 Built with Streamlit, FAISS & Groq AI")
+# import os
 # import streamlit as st
 # import re
+# import json
 # from PyPDF2 import PdfReader
 # from docx import Document
 # import spacy
+# from sentence_transformers import SentenceTransformer, util
 # from groq import Groq
+# # Initialize NLP components
 # try:
 #     nlp = spacy.load("en_core_web_sm")
 # except OSError:
 #     from spacy.cli import download
 #     download("en_core_web_sm")
 #     nlp = spacy.load("en_core_web_sm")
+#     # st.error("Please install the SpaCy English model: 'python -m spacy download en_core_web_sm'")
+#     # st.stop()
+# # Initialize models
 # similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
+# # Initialize Groq API client
 # client = Groq(api_key=os.environ["GROQ_API_KEY"])
 # def extract_text(file):
+#     """Extract text from various file formats"""
 #     if file.name.endswith('.pdf'):
 #         reader = PdfReader(file)
+#         return " ".join([page.extract_text() for page in reader.pages])
 #     elif file.name.endswith('.docx'):
 #         doc = Document(file)
 #         return " ".join([para.text for para in doc.paragraphs])
 #     return ""
 # def extract_contact_info(text):
+#     """Extract phone numbers and emails using regex patterns"""
 #     phone_pattern = r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b'
 #     email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
 #     }
 # def extract_name(text):
+#     """Extract candidate name using SpaCy NER"""
 #     doc = nlp(text)
 #     for ent in doc.ents:
 #         if ent.label_ == 'PERSON':
 #     return "Not found"
 # def analyze_sections(text):
+#     """Parse resume sections using rule-based approach"""
+#     sections = {
+#         'experience': [],
+#         'skills': [],
+#         'education': [],
+#         'certifications': []
+#     }
+#     current_section = None
 #     section_keywords = {
 #         'experience': ['experience', 'work history', 'employment'],
 #         'skills': ['skills', 'competencies', 'technologies'],
 #         'education': ['education', 'academic background'],
 #         'certifications': ['certifications', 'licenses', 'courses']
 #     }
 #     for line in text.split('\n'):
 #         line_lower = line.strip().lower()
+#         # Detect section headers
 #         for section, keywords in section_keywords.items():
 #             if any(keyword in line_lower for keyword in keywords):
 #                 current_section = section
 #     return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()}
+# def calculate_similarity(resume_text, jd_text):
+#     """Calculate semantic similarity between resume and JD"""
 #     embeddings = similarity_model.encode([resume_text, jd_text])
+#     return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() * 100
 # def generate_interview_questions(resume_text, jd_text):
+#     """Generate interview questions using Groq API"""
+#     input_text = f"Generate 5 technical interview questions based on resume and job description.\nResume: {resume_text[:1000]}\nJob Description: {jd_text[:500]}"
 #     response = client.chat.completions.create(
+#         messages=[
+#             {"role": "user", "content": input_text}
+#         ],
+#         model="llama-3.3-70b-versatile",
 #     )
+#     return response.choices[0].message.content if response.choices else "Could not generate questions."
 # # Streamlit UI Configuration
 # st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
+# # Main Application
+# st.title("AI-Powered Resume Analyzer 🧠")
+# st.markdown("""
+#     Upload a candidate's resume and paste the job description to get:
+#     - Candidate profile analysis
+#     - Job requirement matching
+#     - Automated interview questions
+# """)
+# # File Upload and JD Input
+# with st.container():
+#     col1, col2 = st.columns([2, 3])
+#     with col1:
+#         uploaded_file = st.file_uploader(
+#             "Upload Resume (PDF/DOCX/TXT)",
+#             type=['pdf', 'docx', 'txt'],
+#             help="Supported formats: PDF, Word, Text"
+#         )
+#     with col2:
+#         jd_input = st.text_area(
+#             "Paste Job Description",
+#             height=200,
+#             placeholder="Paste the complete job description here..."
+#         )
 # if st.button("Process Resume"):
 #     if uploaded_file and jd_input:
 #         resume_text = extract_text(uploaded_file)
 #         if resume_text:
+#             # Candidate Profile Section
+#             st.header("👤 Candidate Profile")
+#             profile_col1, profile_col2 = st.columns([1, 2])
+#             with profile_col1:
+#                 st.subheader("Basic Information")
+#                 name = extract_name(resume_text)
+#                 contact = extract_contact_info(resume_text)
+#                 st.markdown(f"""
+#                     **Name:** {name}
+#                     **Phone:** {contact['phone']}
+#                     **Email:** {contact['email']}
+#                 """)
+#             with profile_col2:
+#                 st.subheader("Professional Summary")
+#                 sections = analyze_sections(resume_text)
+#                 exp_col, edu_col = st.columns(2)
+#                 with exp_col:
+#                     with st.expander("Work Experience"):
+#                         st.write(sections['experience'])
+#                 with edu_col:
+#                     with st.expander("Education"):
+#                         st.write(sections['education'])
+#             # Job Matching Analysis
+#             st.header("📊 Job Compatibility Analysis")
+#             match_score = calculate_similarity(resume_text, jd_input)
 #             st.metric("Match Percentage", f"{match_score:.1f}%")
+#             # Interview Questions
+#             st.header("❓ Suggested Interview Questions")
 #             questions = generate_interview_questions(resume_text, jd_input)
+#             st.write(questions)
 #     else:
+#         st.info("👆 Please upload a resume and enter a job description to begin analysis")
 # st.markdown("---")
+# st.markdown("Built with ♥ using [Streamlit](https://streamlit.io) | [Hugging Face](https://huggingface.co) | [Spacy](https://spacy.io) | FAISS | Groq AI")
 import streamlit as st
 import re
 import json
 from docx import Document
 import spacy
 from sentence_transformers import SentenceTransformer, util
+from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration
+import os
 from groq import Groq
+# Initialize Groq API client
+client = Groq(api_key=os.environ["GROQ_API_KEY"])
 # Initialize NLP components
 try:
     nlp = spacy.load("en_core_web_sm")
 # Initialize models
 similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Initialize T5 question generator with proper tokenizer
+tokenizer = AutoTokenizer.from_pretrained("t5-base", use_fast=False)
+model = T5ForConditionalGeneration.from_pretrained("t5-base")
+question_generator = pipeline(
+    "text2text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    framework="pt"
+)
 def extract_text(file):
     """Extract text from various file formats"""
 def generate_interview_questions(resume_text, jd_text):
     """Generate interview questions using Groq API"""
+    input_text = f"Generate 5 technical easy to medium level interview questions based on resume and job description.\nResume: {resume_text[:1000]}\nJob Description: {jd_text[:500]}"
     response = client.chat.completions.create(
         messages=[
     return response.choices[0].message.content if response.choices else "Could not generate questions."
 # Streamlit UI Configuration
 st.set_page_config(page_title="AI Resume Analyzer", layout="wide")
                 with edu_col:
                     with st.expander("Education"):
                         st.write(sections['education'])
+                skills_col, cert_col = st.columns(2)
+                with skills_col:
+                    with st.expander("Skills"):
+                        st.write(sections['skills'])
+                with cert_col:
+                    with st.expander("Certifications"):
+                        st.write(sections['certifications'])
             # Job Matching Analysis
             st.header("📊 Job Compatibility Analysis")
             match_score = calculate_similarity(resume_text, jd_input)
+            col1, col2 = st.columns([1, 3])
+            with col1:
+                st.metric("Match Percentage", f"{match_score:.1f}%")
+            with col2:
+                st.progress(match_score/100)
+                st.caption("Semantic similarity score between resume content and job description")
             # Interview Questions
             st.header("❓ Suggested Interview Questions")
             questions = generate_interview_questions(resume_text, jd_input)
+            if questions:
+                cleaned_questions = questions.replace("\\n", "\n").split("\n")
+                for i, q in enumerate(cleaned_questions[:5]):
+                    st.markdown(f"{i+1}. {q.strip()}")
+            else:
+                st.warning("Could not generate questions. Please try with more detailed inputs.")
     else:
         st.info("👆 Please upload a resume and enter a job description to begin analysis")
+# Footer
 st.markdown("---")
+st.markdown("Built with ♥ using [Streamlit](https://streamlit.io) | [Hugging Face](https://huggingface.co) | [Spacy](https://spacy.io)")