Om-Shandilya commited on
Commit
a9988a0
Β·
1 Parent(s): 042558f

Add RESTful API backend and decoupled frontend

Browse files
.gitignore CHANGED
@@ -216,4 +216,6 @@ data/raw/*/*csv
216
  data/saved_plots/
217
  models/
218
  __pycache__/
219
- tests/
 
 
 
216
  data/saved_plots/
217
  models/
218
  __pycache__/
219
+ tests/
220
+ download_nltk_data.py
221
+ nltk_data/
backend/main.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from contextlib import asynccontextmanager
3
+ import sys
4
+ import os
5
+
6
+ # This ensures that the backend can find your 'src' and 'pipelines' modules and also adds the parent directory to sys.path.
7
+ PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
8
+ if PROJECT_ROOT not in sys.path:
9
+ sys.path.append(PROJECT_ROOT)
10
+
11
+ from backend.models import (ResumeRequest, ApplicantResponse, JobMatch,
12
+ RecruiterRequest, RecruiterResponse, ResumeMatch)
13
+ from pipelines.core.applicant import run_bert_pipeline, run_tfidf_pipeline, load_job_titles
14
+ from pipelines.core.recruiter import rank_with_bert, rank_with_tfidf
15
+ from src.feature_engg.bert_embedding_data import load_bert_model, load_faiss_index
16
+ from src.feature_engg.tfidf_vectorizing_data import load_tfidf_vectorizer, load_tfidf_matrix
17
+
18
+ # In memory storage for models (dictionary to hold all loaded models):
19
+ ml_models = {}
20
+
21
+ # Create a lifespan function to handle startup and shutdown events:
22
+ @asynccontextmanager
23
+ async def lifespan(app: FastAPI):
24
+ """This code runs ONCE when the server starts up."""
25
+
26
+ print("πŸš€ Server starting up: Loading ML models...")
27
+
28
+ # Load Applicant Models
29
+ ml_models["bert_model"] = load_bert_model(local_bert_path=None, repo_id="Om-Shandilya/resume-matcher-bert")
30
+ ml_models["faiss_index"] = load_faiss_index(local_index_path=None, repo_id="Om-Shandilya/resume-matcher-bert", filename="applicant/jobs.faiss")
31
+ ml_models["applicant_vectorizer"] = load_tfidf_vectorizer(local_vectorizer_path=None, repo_id="Om-Shandilya/resume-matcher-tfidf", filename="applicant/job_vectorizer.pkl")
32
+ ml_models["applicant_matrix"] = load_tfidf_matrix(local_matrix_path=None, repo_id="Om-Shandilya/resume-matcher-tfidf", filename="applicant/job_matrix.npz")
33
+
34
+ # Load Recruiter Models
35
+ ml_models["recruiter_vectorizer"] = load_tfidf_vectorizer(local_vectorizer_path=None, repo_id="Om-Shandilya/resume-matcher-tfidf", filename="recruiter/combined_vectorizer.pkl")
36
+
37
+ # Load Job Titles DataFrames
38
+ ml_models["tfidf_job_df"] = load_job_titles(repo_id='Om-Shandilya/resume-matcher-tfidf', filename='applicant/tfidf_job_titles.csv')
39
+ ml_models["bert_job_df"] = load_job_titles(repo_id='Om-Shandilya/resume-matcher-bert', filename='applicant/bert_job_titles.csv')
40
+
41
+ print("βœ… ML models loaded successfully.")
42
+
43
+ yield
44
+
45
+ # This code runs once when the server is shutting down.
46
+ print(" shutting down: Clearing ML models...")
47
+ ml_models.clear()
48
+
49
+ # Initializing the FastAPI app
50
+ app = FastAPI(
51
+ title="Resume-Job Matcher API",
52
+ description="An API for matching resumes to jobs and ranking candidates.",
53
+ lifespan=lifespan
54
+ )
55
+
56
+ # Creating the API endpoints:
57
+ @app.get("/")
58
+ def read_root():
59
+ return {"status": "Resume Matcher API is running."}
60
+
61
+ # Applicant side endpoints:
62
+ @app.post("/applicant/match/bert", response_model=ApplicantResponse)
63
+ async def match_resume_bert(request: ResumeRequest):
64
+ try:
65
+ matches, message = run_bert_pipeline(
66
+ raw_resume=request.raw_text,
67
+ model=ml_models["bert_model"],
68
+ job_index=ml_models["faiss_index"],
69
+ job_df=ml_models["bert_job_df"],
70
+ top_k=request.top_k)
71
+
72
+ response_matches = [JobMatch(job_title=title, match_score=score) for title, score in matches]
73
+ return ApplicantResponse(matches=response_matches, message=message)
74
+ except Exception as e:
75
+ raise HTTPException(status_code=500, detail=str(e))
76
+
77
+ @app.post("/applicant/match/tf-idf", response_model=ApplicantResponse)
78
+ async def match_resume_tfidf(request: ResumeRequest):
79
+ try:
80
+ matches, message = run_tfidf_pipeline(
81
+ raw_resume=request.raw_text,
82
+ vectorizer=ml_models["applicant_vectorizer"],
83
+ job_matrix=ml_models["applicant_matrix"],
84
+ job_df=ml_models["tfidf_job_df"],
85
+ top_k=request.top_k)
86
+
87
+ response_matches = [JobMatch(job_title=title, match_score=score) for title, score in matches]
88
+ return ApplicantResponse(matches=response_matches, message=message)
89
+ except Exception as e:
90
+ raise HTTPException(status_code=500, detail=str(e))
91
+
92
+
93
+ # Recruiter side endpoints:
94
+ @app.post("/recruiter/rank/bert", response_model=RecruiterResponse)
95
+ async def rank_resumes_bert(request: RecruiterRequest):
96
+ try:
97
+ matches, message = rank_with_bert(
98
+ raw_job_text=request.raw_job_text,
99
+ raw_resume_texts=request.raw_resume_texts,
100
+ model=ml_models["bert_model"],
101
+ top_k=request.top_k)
102
+
103
+ response_matches = [ResumeMatch(resume_filename=fname, match_score=score) for fname, score in matches]
104
+ return RecruiterResponse(matches=response_matches, message=message)
105
+ except Exception as e:
106
+ raise HTTPException(status_code=500, detail=str(e))
107
+
108
+ @app.post("/recruiter/rank/tf-idf", response_model=RecruiterResponse)
109
+ async def rank_resumes_tfidf(request: RecruiterRequest):
110
+ try:
111
+ matches, message = rank_with_tfidf(
112
+ raw_job_text=request.raw_job_text,
113
+ raw_resume_texts=request.raw_resume_texts,
114
+ vectorizer=ml_models["recruiter_vectorizer"],
115
+ top_k=request.top_k)
116
+
117
+ response_matches = [ResumeMatch(resume_filename=fname, match_score=score) for fname, score in matches]
118
+ return RecruiterResponse(matches=response_matches, message=message)
119
+ except Exception as e:
120
+ raise HTTPException(status_code=500, detail=str(e))
backend/models.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Dict
3
+
4
+
5
+ # Applicant Side Models:
6
+ class ResumeRequest(BaseModel):
7
+ """The request body for matching a single resume."""
8
+ raw_text: str
9
+ top_k: int | None = None
10
+
11
+ class JobMatch(BaseModel):
12
+ """Represents a single job match with its score."""
13
+ job_title: str
14
+ match_score: float
15
+
16
+ class ApplicantResponse(BaseModel):
17
+ """The response body containing job matches and a message."""
18
+ matches: List[JobMatch]
19
+ message: str
20
+
21
+
22
+ # Recruiter Side Models:
23
+ class RecruiterRequest(BaseModel):
24
+ """The request body for ranking multiple resumes against a job description."""
25
+ raw_job_text: str
26
+ raw_resume_texts: Dict[str, str] # dict of {filename: raw_resume_text}
27
+ top_k: int | None = None
28
+
29
+ class ResumeMatch(BaseModel):
30
+ """Represents a single ranked resume with its score."""
31
+ resume_filename: str
32
+ match_score: float
33
+
34
+ class RecruiterResponse(BaseModel):
35
+ """The response body containing ranked resumes and a message."""
36
+ matches: List[ResumeMatch]
37
+ message: str
environment.yml CHANGED
@@ -1,38 +1,29 @@
1
- # Tested on Windows 11 with NVIDIA GPU (CUDA driver 13.0, PyTorch CUDA 12.1 build)
2
- # Use: conda env create -f environment.yml
3
-
4
  name: resume-matcher
5
  channels:
 
6
  - conda-forge
7
  - defaults
8
  dependencies:
9
  - python=3.10
10
  - pip
11
- # Core scientific stack
12
- - numpy
 
 
 
13
  - pandas
14
  - scikit-learn
15
- - scipy
16
  - joblib
17
- - tqdm
18
- - matplotlib
19
- - seaborn
20
- - wordcloud
21
- # NLP / ML essentials
22
  - faiss-cpu
23
- - nltk
24
- - statsmodels
25
  - huggingface_hub
26
- # File handling
27
- - openpyxl
28
- - lxml
29
- - pillow
30
- - pyyaml
31
- - python-docx
32
  - pdfminer.six
33
- # pip-only packages
 
 
 
 
 
34
  - pip:
35
  - sentence-transformers
36
- - transformers
37
- - accelerate
38
- - datasets
 
 
 
 
1
  name: resume-matcher
2
  channels:
3
+ - pytorch
4
  - conda-forge
5
  - defaults
6
  dependencies:
7
  - python=3.10
8
  - pip
9
+ # --- Core Application Dependencies ---
10
+ - fastapi
11
+ - uvicorn
12
+ - streamlit
13
+ - altair
14
  - pandas
15
  - scikit-learn
 
16
  - joblib
17
+ - pytorch
 
 
 
 
18
  - faiss-cpu
 
 
19
  - huggingface_hub
 
 
 
 
 
 
20
  - pdfminer.six
21
+ - python-docx
22
+ - requests
23
+ - nltk
24
+ - scipy
25
+ - anyio
26
+ # --- Pip-only packages ---
27
  - pip:
28
  - sentence-transformers
29
+ - python-multipart
 
 
gui/app.py CHANGED
@@ -1,29 +1,34 @@
1
  import streamlit as st
2
  import os
 
3
  import tempfile
4
  import pandas as pd
5
  import shutil
6
- import sys
7
  import altair as alt
8
- sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
 
 
 
 
 
9
  from src.utils.bulk_loading import bulk_load_raw_resume_files
10
  from src.utils.file_reader import extract_text_from_file
11
- from src.utils.model_loader import get_applicant_matrix, get_applicant_vectorizer, get_bert_model, get_faiss_index, get_recruiter_vectorizer
12
- from pipelines.core.applicant import run_tfidf_pipeline as applicant_tfidf, run_bert_pipeline as applicant_bert
13
- from pipelines.core.recruiter import rank_with_tfidf as recruiter_tfidf, rank_with_bert as recruiter_bert
14
 
15
- # --- App Configuration ---
 
 
 
16
  st.set_page_config(
17
  page_title="Resume-Job Matcher",
18
- page_icon="πŸ“„",
19
  layout="wide"
20
  )
21
 
22
- # --- Main App ---
23
  st.title("🎯 AI-Powered Resume-Job Matcher")
24
  st.write("---")
25
 
26
- # --- Sidebar for Mode Selection ---
27
  with st.sidebar:
28
  st.header("Controls")
29
  app_mode = st.radio(
@@ -34,36 +39,28 @@ with st.sidebar:
34
  model_choice = st.selectbox(
35
  "Choose the AI Model",
36
  ("TF-IDF", "BERT"),
37
- help="TF-IDF is faster. BERT is more accurate."
38
  )
39
-
40
  st.write("---")
41
-
42
- # Add a checkbox to control the 'show all' feature
43
  show_all = st.checkbox("Show all matches", value=False)
44
-
45
  if show_all:
46
  top_k = None
47
- # Disable the slider when 'show_all' is checked for better UX
48
  st.slider(
49
- "Number of matches to show",
50
  min_value=1, max_value=50, value=5, step=1,
51
  disabled=True
52
  )
53
  st.info("Showing all ranked results.")
54
  else:
55
- # Enable the slider when 'show_all' is unchecked
56
  top_k = st.slider(
57
- "Number of matches to show",
58
  min_value=1, max_value=50, value=5, step=1,
59
  disabled=False
60
  )
61
 
62
-
63
- # --- Applicant View ---
64
  if app_mode == "Applicant":
65
  st.header("Applicant: Match Your Resume to a Job")
66
-
67
  resume_file = st.file_uploader(
68
  "Upload your resume",
69
  type=['pdf', 'docx', 'txt'],
@@ -73,150 +70,110 @@ if app_mode == "Applicant":
73
  if resume_file:
74
  st.success(f"βœ… Successfully uploaded `{resume_file.name}`")
75
  if st.button("Find Top Job Matches", type="primary", width='stretch'):
76
-
77
- with st.spinner(f"Analyzing resume with {model_choice}..."):
78
-
79
  tmp_file_path = None
80
  try:
81
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(resume_file.name)[1]) as tmp_file:
82
  tmp_file.write(resume_file.getvalue())
83
  tmp_file_path = tmp_file.name
84
-
85
  raw_resume_text = extract_text_from_file(tmp_file_path)
86
 
87
- if model_choice == "BERT":
88
- bert_model = get_bert_model()
89
- faiss_index = get_faiss_index()
90
- matches, message = applicant_bert(raw_resume_text,
91
- model=bert_model,
92
- job_index=faiss_index,
93
- top_k=top_k,)
94
-
95
- else:
96
- applicant_vectorizer = get_applicant_vectorizer()
97
- applicant_matrix = get_applicant_matrix()
98
- matches, message = applicant_tfidf(raw_resume_text,
99
- vectorizer=applicant_vectorizer,
100
- job_matrix=applicant_matrix,
101
- top_k=top_k)
102
 
103
  if not matches:
104
  st.warning("⚠️ No suitable job matches found.")
105
  else:
106
- st.subheader(f"Top {len(matches)} Job Matches:")
107
  st.info(message)
108
-
109
- df = pd.DataFrame(matches, columns=["Job Title", "Match Score"])
110
-
111
- # Sort the DataFrame by 'Match Score' in descending order to show best matches at the top
112
- df = df.sort_values(by="Match Score", ascending=False).reset_index(drop=True)
113
 
114
  chart = alt.Chart(df).mark_bar().encode(
115
- y=alt.Y('Job Title', sort='-x', title=None),
116
- x=alt.X('Match Score', axis=None, scale=alt.Scale(domainMin=0)),
117
-
118
- # Tooltip to reveal score on hover
119
- tooltip=['Job Title', alt.Tooltip('Match Score', format='.3f')]
120
- ).properties(
121
- # Set a responsive title for the chart to indicate what the bars represent
122
- title="Relative Job Match Scores"
123
- ).interactive()
124
-
125
  st.altair_chart(chart, use_container_width=True)
126
 
 
 
127
  except Exception as e:
128
  st.error(f"An error occurred: {e}")
129
-
130
  finally:
131
  if tmp_file_path and os.path.exists(tmp_file_path):
132
  os.unlink(tmp_file_path)
133
 
134
-
135
- # --- Recruiter View ---
136
  if app_mode == "Recruiter":
137
  st.header("Recruiter: Rank Resumes for a Job Description")
138
-
139
- job_desc_file = st.file_uploader(
140
- "Upload the job description",
141
- type=['pdf', 'docx', 'txt'],
142
- help="Upload the job description in PDF, DOCX, or TXT format."
143
- )
144
-
145
- resume_files = st.file_uploader(
146
- "Upload candidate resumes",
147
- type=['pdf', 'docx', 'txt'],
148
- accept_multiple_files=True,
149
- help="Upload one or more resumes."
150
- )
151
 
152
  if job_desc_file and resume_files:
153
- st.success(f"βœ… Successfully uploaded job description `{job_desc_file.name}` and {len(resume_files)} resumes.")
154
  if st.button("Rank Resumes", type="primary", width='stretch'):
155
-
156
- with st.spinner(f"Ranking {len(resume_files)} resumes with {model_choice}..."):
157
-
158
- # Paths for cleanup in the finally block
159
  temp_dir = None
160
  job_desc_path = None
161
-
162
  try:
163
- # 1. Handle the single job description file
164
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(job_desc_file.name)[1]) as tmp_file:
165
  tmp_file.write(job_desc_file.getvalue())
166
  job_desc_path = tmp_file.name
167
  raw_job_text = extract_text_from_file(job_desc_path)
168
 
169
- # 2. Handle multiple resume files by creating a temp directory for bulk loading
170
  temp_dir = tempfile.mkdtemp()
171
  for resume_file in resume_files:
172
- resume_path = os.path.join(temp_dir, resume_file.name)
173
- with open(resume_path, "wb") as f:
174
  f.write(resume_file.getbuffer())
175
-
176
- # Bulk loading all resumes from the temp directory
177
  raw_resume_texts = bulk_load_raw_resume_files(temp_dir)
178
 
179
- # 3. Call the appropriate model's pipeline based on the model choice (default to TF-IDF)
180
- if model_choice == "BERT":
181
- bert_model = get_bert_model()
182
- ranked_resumes, message = recruiter_bert(raw_job_text,
183
- raw_resume_texts,
184
- model=bert_model,
185
- top_k=top_k)
186
- else:
187
- vectorizer = get_recruiter_vectorizer()
188
- ranked_resumes, message = recruiter_tfidf(raw_job_text,
189
- raw_resume_texts,
190
- vectorizer=vectorizer,
191
- top_k=top_k)
192
 
193
- # 4. Display results
194
  if not ranked_resumes:
195
  st.warning("⚠️ Could not rank resumes. Please check the files.")
196
  else:
197
- st.subheader(f"Top {len(ranked_resumes)} Ranked Resumes:")
198
  st.info(message)
199
- df = pd.DataFrame(ranked_resumes, columns=["Resume", "Match Score"])
200
-
201
- df["Match Score"] = df["Match Score"].apply(lambda x: min(1.0, x))
202
  st.dataframe(
203
  df,
204
- column_config={"Resume": st.column_config.TextColumn("Resume"),
205
- "Match Score": st.column_config.ProgressColumn("Match Score",
206
- format="%.2f",
207
- min_value=0,
208
- max_value=1,),
209
- },
210
- width='stretch',
211
- hide_index=True,
212
  )
213
 
 
 
214
  except Exception as e:
215
- st.error(f"⚠️An error occurred: {e}")
216
-
217
  finally:
218
- # 5. Clean up all temporary files and the directory
219
  if job_desc_path and os.path.exists(job_desc_path):
220
  os.unlink(job_desc_path)
221
  if temp_dir and os.path.exists(temp_dir):
222
- shutil.rmtree(temp_dir)
 
1
  import streamlit as st
2
  import os
3
+ import sys
4
  import tempfile
5
  import pandas as pd
6
  import shutil
 
7
  import altair as alt
8
+ import requests # Import for making API requests
9
+
10
+ # Ensure the parent directory is in sys.path for imports
11
+ PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
12
+ if PROJECT_ROOT not in sys.path:
13
+ sys.path.append(PROJECT_ROOT)
14
  from src.utils.bulk_loading import bulk_load_raw_resume_files
15
  from src.utils.file_reader import extract_text_from_file
 
 
 
16
 
17
+ # Configuring the backend API URL
18
+ API_URL = "http://127.0.0.1:8000"
19
+
20
+ # Configuring the Streamlit app
21
  st.set_page_config(
22
  page_title="Resume-Job Matcher",
23
+ page_icon="πŸ‘¨β€πŸ’Ό",
24
  layout="wide"
25
  )
26
 
27
+ # Main app title and description
28
  st.title("🎯 AI-Powered Resume-Job Matcher")
29
  st.write("---")
30
 
31
+ # Creating sidebar for controls
32
  with st.sidebar:
33
  st.header("Controls")
34
  app_mode = st.radio(
 
39
  model_choice = st.selectbox(
40
  "Choose the AI Model",
41
  ("TF-IDF", "BERT"),
42
+ help="TF-IDF is baseline. BERT is more accurate and semantic."
43
  )
 
44
  st.write("---")
 
 
45
  show_all = st.checkbox("Show all matches", value=False)
 
46
  if show_all:
47
  top_k = None
 
48
  st.slider(
49
+ "Number of matches to show",
50
  min_value=1, max_value=50, value=5, step=1,
51
  disabled=True
52
  )
53
  st.info("Showing all ranked results.")
54
  else:
 
55
  top_k = st.slider(
56
+ "Number of matches to show",
57
  min_value=1, max_value=50, value=5, step=1,
58
  disabled=False
59
  )
60
 
61
+ # Applicant view of the app
 
62
  if app_mode == "Applicant":
63
  st.header("Applicant: Match Your Resume to a Job")
 
64
  resume_file = st.file_uploader(
65
  "Upload your resume",
66
  type=['pdf', 'docx', 'txt'],
 
70
  if resume_file:
71
  st.success(f"βœ… Successfully uploaded `{resume_file.name}`")
72
  if st.button("Find Top Job Matches", type="primary", width='stretch'):
73
+ with st.spinner(f"Sending your resume to the AI backend for matching..."):
 
 
74
  tmp_file_path = None
75
  try:
76
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(resume_file.name)[1]) as tmp_file:
77
  tmp_file.write(resume_file.getvalue())
78
  tmp_file_path = tmp_file.name
 
79
  raw_resume_text = extract_text_from_file(tmp_file_path)
80
 
81
+ endpoint = f"{API_URL}/applicant/match/{model_choice.lower()}"
82
+ payload = {"raw_text": raw_resume_text, "top_k": top_k}
83
+
84
+ response = requests.post(endpoint, json=payload, timeout=180) # 3-minute timeout
85
+ response.raise_for_status() # Raises HTTPError for bad responses eg. 4xx, 5xx
86
+
87
+ api_data = response.json()
88
+ matches = api_data.get("matches", [])
89
+ message = api_data.get("message", "No message from server.")
 
 
 
 
 
 
90
 
91
  if not matches:
92
  st.warning("⚠️ No suitable job matches found.")
93
  else:
 
94
  st.info(message)
95
+ st.subheader(f"Top {len(matches)} Job Matches:")
96
+
97
+ df = pd.DataFrame(matches) # Pandas handles list of dicts perfectly
98
+ df = df.sort_values(by="match_score", ascending=False).reset_index(drop=True)
 
99
 
100
  chart = alt.Chart(df).mark_bar().encode(
101
+ y=alt.Y('job_title', sort='-x', title=None, axis=alt.Axis(labelLimit=400)),
102
+ x=alt.X('match_score', axis=None, scale=alt.Scale(domainMin=0)),
103
+ tooltip=['job_title', alt.Tooltip('match_score', format='.3f')]
104
+ ).properties(title="Relative Job Match Scores").interactive()
105
+
 
 
 
 
 
106
  st.altair_chart(chart, use_container_width=True)
107
 
108
+ except requests.exceptions.RequestException as e:
109
+ st.error(f"API Error: Could not connect to the backend. Please ensure the backend server is running. Details: {e}")
110
  except Exception as e:
111
  st.error(f"An error occurred: {e}")
 
112
  finally:
113
  if tmp_file_path and os.path.exists(tmp_file_path):
114
  os.unlink(tmp_file_path)
115
 
116
+ # Recruiter view of the app
 
117
  if app_mode == "Recruiter":
118
  st.header("Recruiter: Rank Resumes for a Job Description")
119
+ job_desc_file = st.file_uploader("Upload the job description", type=['pdf', 'docx', 'txt'])
120
+ resume_files = st.file_uploader("Upload candidate resumes", type=['pdf', 'docx', 'txt'], accept_multiple_files=True)
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  if job_desc_file and resume_files:
123
+ st.success(f"βœ… Successfully uploaded job description and {len(resume_files)} resumes.")
124
  if st.button("Rank Resumes", type="primary", width='stretch'):
125
+ with st.spinner(f"Sending files to the AI backend for ranking..."):
 
 
 
126
  temp_dir = None
127
  job_desc_path = None
 
128
  try:
 
129
  with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(job_desc_file.name)[1]) as tmp_file:
130
  tmp_file.write(job_desc_file.getvalue())
131
  job_desc_path = tmp_file.name
132
  raw_job_text = extract_text_from_file(job_desc_path)
133
 
 
134
  temp_dir = tempfile.mkdtemp()
135
  for resume_file in resume_files:
136
+ with open(os.path.join(temp_dir, resume_file.name), "wb") as f:
 
137
  f.write(resume_file.getbuffer())
 
 
138
  raw_resume_texts = bulk_load_raw_resume_files(temp_dir)
139
 
140
+ endpoint = f"{API_URL}/recruiter/rank/{model_choice.lower()}"
141
+ payload = {
142
+ "raw_job_text": raw_job_text,
143
+ "raw_resume_texts": raw_resume_texts,
144
+ "top_k": top_k
145
+ }
146
+ response = requests.post(endpoint, json=payload, timeout=300) # 5-minute timeout
147
+ response.raise_for_status() # Raises HTTPError for bad responses eg. 4xx, 5xx
148
+
149
+ api_data = response.json()
150
+ ranked_resumes = api_data.get("matches", [])
151
+ message = api_data.get("message", "No message from server.")
 
152
 
 
153
  if not ranked_resumes:
154
  st.warning("⚠️ Could not rank resumes. Please check the files.")
155
  else:
 
156
  st.info(message)
157
+ st.subheader(f"Top {len(ranked_resumes)} Ranked Resumes:")
158
+ df = pd.DataFrame(ranked_resumes)
159
+ df["match_score"] = df["match_score"].apply(lambda x: min(1.0, x))
160
  st.dataframe(
161
  df,
162
+ column_config={
163
+ "resume_filename": st.column_config.TextColumn("Resume"),
164
+ "match_score": st.column_config.ProgressColumn(
165
+ "Match Score", format="%.2f", min_value=0, max_value=1
166
+ ),
167
+ },
168
+ hide_index=True,
 
169
  )
170
 
171
+ except requests.exceptions.RequestException as e:
172
+ st.error(f"API Error: Could not connect to the backend. Please ensure the backend server is running. Details: {e}")
173
  except Exception as e:
174
+ st.error(f"An error occurred: {e}")
 
175
  finally:
 
176
  if job_desc_path and os.path.exists(job_desc_path):
177
  os.unlink(job_desc_path)
178
  if temp_dir and os.path.exists(temp_dir):
179
+ shutil.rmtree(temp_dir)
pipelines/core/applicant.py CHANGED
@@ -1,5 +1,7 @@
1
  import pandas as pd
 
2
  from pathlib import Path
 
3
  from src.feature_engg.tfidf_vectorizing_data import load_tfidf_vectorizer, load_tfidf_matrix
4
  from src.feature_engg.bert_embedding_data import load_bert_model, load_faiss_index
5
  from src.processing.text_cleaning import clean_text, clean_text_for_bert
@@ -7,8 +9,22 @@ from src.matching.matching_engine import compute_similarity_matrix, top_n_tfidf_
7
 
8
  # Defining paths for data files
9
  PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
10
- def load_job_titles(job_csv_path: str):
11
- df = pd.read_csv(job_csv_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  if "title" not in df.columns:
13
  raise ValueError("Job CSV must contain a 'title' column.")
14
  return df
@@ -16,6 +32,7 @@ def load_job_titles(job_csv_path: str):
16
  def run_tfidf_pipeline(raw_resume: str, *,
17
  vectorizer=None,
18
  job_matrix=None,
 
19
  local_vectorizer_path=None,
20
  local_matrix_path=None,
21
  repo_id="Om-Shandilya/resume-matcher-tfidf",
@@ -28,7 +45,8 @@ def run_tfidf_pipeline(raw_resume: str, *,
28
  Args:
29
  raw_resume (str): Raw text of the resume.
30
  vectorizer (TfidfVectorizer, optional): Preloaded TF-IDF vectorizer.
31
- job_matrix (scipy.sparse matrix, optional): Preloaded TF-IDF job matrix
 
32
  local_vectorizer_path (str, optional): Local path to TF-IDF vectorizer.
33
  local_matrix_path (str, optional): Local path to TF-IDF matrix.
34
  repo_id (str): Hugging Face repo ID for vectorizer/matrix.
@@ -51,7 +69,9 @@ def run_tfidf_pipeline(raw_resume: str, *,
51
  resume_vector = vectorizer.transform([cleaned_resume])
52
  sim_matrix = compute_similarity_matrix(resume_vector, job_matrix)
53
 
54
- job_df = load_job_titles(PROJECT_ROOT / "data/app_data/tfidf_job_titles.csv")
 
 
55
  total_jobs = len(job_df['title'].unique())
56
 
57
  message = ""
@@ -81,6 +101,7 @@ def run_tfidf_pipeline(raw_resume: str, *,
81
  def run_bert_pipeline(raw_resume: str, *,
82
  model=None,
83
  job_index=None,
 
84
  local_bert_path=None,
85
  local_index_path=None,
86
  repo_id="Om-Shandilya/resume-matcher-bert",
@@ -93,6 +114,7 @@ def run_bert_pipeline(raw_resume: str, *,
93
  raw_resume (str): Raw text of the resume.
94
  model (SentenceTransformer, optional): Preloaded BERT model.
95
  job_index (faiss.Index, optional): Preloaded FAISS index.
 
96
  local_bert_path (str, optional): Local path to BERT model.
97
  local_index_path (str, optional): Local path to FAISS index.
98
  repo_id (str): Hugging Face repo ID for model/index.
@@ -113,7 +135,10 @@ def run_bert_pipeline(raw_resume: str, *,
113
  resume_embedding = model.encode([cleaned_resume], normalize_embeddings=True)
114
 
115
  D, I = job_index.search(resume_embedding, job_index.ntotal)
116
- job_df = load_job_titles(PROJECT_ROOT / "data/app_data/bert_job_titles.csv")
 
 
 
117
  total_jobs = len(job_df['title'].unique())
118
 
119
  message = ""
 
1
  import pandas as pd
2
+ import os
3
  from pathlib import Path
4
+ from huggingface_hub import hf_hub_download
5
  from src.feature_engg.tfidf_vectorizing_data import load_tfidf_vectorizer, load_tfidf_matrix
6
  from src.feature_engg.bert_embedding_data import load_bert_model, load_faiss_index
7
  from src.processing.text_cleaning import clean_text, clean_text_for_bert
 
9
 
10
  # Defining paths for data files
11
  PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
12
+ def load_job_titles(local_path=None, repo_id=None, filename=None):
13
+ """
14
+ Load job titles, preferring a local path if provided, otherwise
15
+ downloading from the Hugging Face Hub.
16
+ """
17
+ file_path = ""
18
+ if local_path and os.path.exists(local_path):
19
+ print(f"πŸ“‚ Using local job titles from {local_path}")
20
+ file_path = local_path
21
+ elif repo_id and filename:
22
+ print(f"🌐 Downloading job titles from Hugging Face Hub ({repo_id}/{filename})")
23
+ file_path = hf_hub_download(repo_id=repo_id, filename=filename)
24
+ else:
25
+ raise ValueError("Must provide either a valid local_path or repo_id and filename.")
26
+
27
+ df = pd.read_csv(file_path)
28
  if "title" not in df.columns:
29
  raise ValueError("Job CSV must contain a 'title' column.")
30
  return df
 
32
  def run_tfidf_pipeline(raw_resume: str, *,
33
  vectorizer=None,
34
  job_matrix=None,
35
+ job_df=None,
36
  local_vectorizer_path=None,
37
  local_matrix_path=None,
38
  repo_id="Om-Shandilya/resume-matcher-tfidf",
 
45
  Args:
46
  raw_resume (str): Raw text of the resume.
47
  vectorizer (TfidfVectorizer, optional): Preloaded TF-IDF vectorizer.
48
+ job_matrix (scipy.sparse matrix, optional): Preloaded TF-IDF job matrix.
49
+ job_df (pd.DataFrame, optional): DataFrame of job titles.
50
  local_vectorizer_path (str, optional): Local path to TF-IDF vectorizer.
51
  local_matrix_path (str, optional): Local path to TF-IDF matrix.
52
  repo_id (str): Hugging Face repo ID for vectorizer/matrix.
 
69
  resume_vector = vectorizer.transform([cleaned_resume])
70
  sim_matrix = compute_similarity_matrix(resume_vector, job_matrix)
71
 
72
+ if job_df is None:
73
+ job_df = load_job_titles(repo_id='Om-Shandilya/resume-matcher-tfidf', filename='applicant/tfidf_job_titles.csv')
74
+
75
  total_jobs = len(job_df['title'].unique())
76
 
77
  message = ""
 
101
  def run_bert_pipeline(raw_resume: str, *,
102
  model=None,
103
  job_index=None,
104
+ job_df=None,
105
  local_bert_path=None,
106
  local_index_path=None,
107
  repo_id="Om-Shandilya/resume-matcher-bert",
 
114
  raw_resume (str): Raw text of the resume.
115
  model (SentenceTransformer, optional): Preloaded BERT model.
116
  job_index (faiss.Index, optional): Preloaded FAISS index.
117
+ job_df (pd.DataFrame, optional): DataFrame of job titles.
118
  local_bert_path (str, optional): Local path to BERT model.
119
  local_index_path (str, optional): Local path to FAISS index.
120
  repo_id (str): Hugging Face repo ID for model/index.
 
135
  resume_embedding = model.encode([cleaned_resume], normalize_embeddings=True)
136
 
137
  D, I = job_index.search(resume_embedding, job_index.ntotal)
138
+
139
+ if job_df is None:
140
+ job_df = load_job_titles(repo_id='Om-Shandilya/resume-matcher-bert', filename='applicant/bert_job_titles.csv')
141
+
142
  total_jobs = len(job_df['title'].unique())
143
 
144
  message = ""
src/feature_engg/bert_embedding_data.py CHANGED
@@ -100,17 +100,30 @@ def bert_embed_text(df: pd.DataFrame,
100
  return embeddings, model
101
 
102
 
103
- def load_faiss_index(local_index_path: str, repo_id: str, filename: str):
104
- """Load FAISS index, preferring local then HF Hub."""
 
 
 
 
 
 
 
105
  if local_index_path:
106
  if not os.path.exists(local_index_path):
107
  raise FileNotFoundError(f"❌ Local FAISS index not found at {local_index_path}")
108
- print(f"πŸ“‚ Loading local FAISS index from {local_index_path}")
109
- return faiss.read_index(local_index_path)
110
-
111
- print(f"🌐 Downloading FAISS index from Hugging Face Hub ({repo_id}/{filename})")
112
- faiss_path = hf_hub_download(repo_id=repo_id, filename=filename)
113
- return faiss.read_index(faiss_path)
 
 
 
 
 
 
114
 
115
  def load_bert_model(local_bert_path: str, repo_id: str='Om-Shandilya/resume-matcher-bert'):
116
  """
 
100
  return embeddings, model
101
 
102
 
103
+ import faiss
104
+ import os
105
+ from huggingface_hub import hf_hub_download
106
+
107
+ def load_faiss_index(local_index_path: str, repo_id: str, filename: str, lazy_loading: bool = True):
108
+ """
109
+ Load FAISS index, preferring local then HF Hub. Applies lazy loading by default.
110
+ """
111
+ index_path = ""
112
  if local_index_path:
113
  if not os.path.exists(local_index_path):
114
  raise FileNotFoundError(f"❌ Local FAISS index not found at {local_index_path}")
115
+ print(f"πŸ“‚ Using local FAISS index from {local_index_path}")
116
+ index_path = local_index_path
117
+ else:
118
+ print(f"🌐 Downloading FAISS index from Hugging Face Hub ({repo_id}/{filename})")
119
+ index_path = hf_hub_download(repo_id=repo_id, filename=filename)
120
+
121
+ if lazy_loading:
122
+ print(" -> Loading with lazy loading (MMAP).")
123
+ return faiss.read_index(index_path, faiss.IO_FLAG_MMAP)
124
+ else:
125
+ print(" -> Loading into memory directly.")
126
+ return faiss.read_index(index_path)
127
 
128
  def load_bert_model(local_bert_path: str, repo_id: str='Om-Shandilya/resume-matcher-bert'):
129
  """
src/processing/text_cleaning.py CHANGED
@@ -1,4 +1,5 @@
1
  import re
 
2
  import string
3
  import pandas as pd
4
  from typing import Optional
@@ -9,6 +10,8 @@ from nltk.corpus import stopwords
9
  from nltk.stem import WordNetLemmatizer
10
  import unicodedata
11
 
 
 
12
  # Only Download necessary NLTK resources if not already present
13
  nltk_packages = {
14
  "stopwords": "corpora/stopwords",
 
1
  import re
2
+ import os
3
  import string
4
  import pandas as pd
5
  from typing import Optional
 
10
  from nltk.stem import WordNetLemmatizer
11
  import unicodedata
12
 
13
+ nltk.data.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'nltk_data'))
14
+
15
  # Only Download necessary NLTK resources if not already present
16
  nltk_packages = {
17
  "stopwords": "corpora/stopwords",