Om-Shandilya commited on
Commit
042558f
·
1 Parent(s): 0ad99b7

Add caching to the GUI

Browse files
gui/app.py CHANGED
@@ -8,6 +8,7 @@ import altair as alt
8
  sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
9
  from src.utils.bulk_loading import bulk_load_raw_resume_files
10
  from src.utils.file_reader import extract_text_from_file
 
11
  from pipelines.core.applicant import run_tfidf_pipeline as applicant_tfidf, run_bert_pipeline as applicant_bert
12
  from pipelines.core.recruiter import rank_with_tfidf as recruiter_tfidf, rank_with_bert as recruiter_bert
13
 
@@ -71,7 +72,7 @@ if app_mode == "Applicant":
71
 
72
  if resume_file:
73
  st.success(f"✅ Successfully uploaded `{resume_file.name}`")
74
- if st.button("Find Top Job Matches", type="primary", use_container_width=True):
75
 
76
  with st.spinner(f"Analyzing resume with {model_choice}..."):
77
 
@@ -84,9 +85,20 @@ if app_mode == "Applicant":
84
  raw_resume_text = extract_text_from_file(tmp_file_path)
85
 
86
  if model_choice == "BERT":
87
- matches, message = applicant_bert(raw_resume_text, top_k=top_k)
 
 
 
 
 
 
88
  else:
89
- matches, message = applicant_tfidf(raw_resume_text, top_k=top_k)
 
 
 
 
 
90
 
91
  if not matches:
92
  st.warning("⚠️ No suitable job matches found.")
@@ -139,7 +151,7 @@ if app_mode == "Recruiter":
139
 
140
  if job_desc_file and resume_files:
141
  st.success(f"✅ Successfully uploaded job description `{job_desc_file.name}` and {len(resume_files)} resumes.")
142
- if st.button("Rank Resumes", type="primary", use_container_width=True):
143
 
144
  with st.spinner(f"Ranking {len(resume_files)} resumes with {model_choice}..."):
145
 
@@ -166,9 +178,17 @@ if app_mode == "Recruiter":
166
 
167
  # 3. Call the appropriate model's pipeline based on the model choice (default to TF-IDF)
168
  if model_choice == "BERT":
169
- ranked_resumes, message = recruiter_bert(raw_job_text, raw_resume_texts, top_k=top_k)
 
 
 
 
170
  else:
171
- ranked_resumes, message = recruiter_tfidf(raw_job_text, raw_resume_texts, top_k=top_k)
 
 
 
 
172
 
173
  # 4. Display results
174
  if not ranked_resumes:
@@ -187,7 +207,7 @@ if app_mode == "Recruiter":
187
  min_value=0,
188
  max_value=1,),
189
  },
190
- use_container_width=True,
191
  hide_index=True,
192
  )
193
 
 
8
  sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
9
  from src.utils.bulk_loading import bulk_load_raw_resume_files
10
  from src.utils.file_reader import extract_text_from_file
11
+ from src.utils.model_loader import get_applicant_matrix, get_applicant_vectorizer, get_bert_model, get_faiss_index, get_recruiter_vectorizer
12
  from pipelines.core.applicant import run_tfidf_pipeline as applicant_tfidf, run_bert_pipeline as applicant_bert
13
  from pipelines.core.recruiter import rank_with_tfidf as recruiter_tfidf, rank_with_bert as recruiter_bert
14
 
 
72
 
73
  if resume_file:
74
  st.success(f"✅ Successfully uploaded `{resume_file.name}`")
75
+ if st.button("Find Top Job Matches", type="primary", width='stretch'):
76
 
77
  with st.spinner(f"Analyzing resume with {model_choice}..."):
78
 
 
85
  raw_resume_text = extract_text_from_file(tmp_file_path)
86
 
87
  if model_choice == "BERT":
88
+ bert_model = get_bert_model()
89
+ faiss_index = get_faiss_index()
90
+ matches, message = applicant_bert(raw_resume_text,
91
+ model=bert_model,
92
+ job_index=faiss_index,
93
+ top_k=top_k,)
94
+
95
  else:
96
+ applicant_vectorizer = get_applicant_vectorizer()
97
+ applicant_matrix = get_applicant_matrix()
98
+ matches, message = applicant_tfidf(raw_resume_text,
99
+ vectorizer=applicant_vectorizer,
100
+ job_matrix=applicant_matrix,
101
+ top_k=top_k)
102
 
103
  if not matches:
104
  st.warning("⚠️ No suitable job matches found.")
 
151
 
152
  if job_desc_file and resume_files:
153
  st.success(f"✅ Successfully uploaded job description `{job_desc_file.name}` and {len(resume_files)} resumes.")
154
+ if st.button("Rank Resumes", type="primary", width='stretch'):
155
 
156
  with st.spinner(f"Ranking {len(resume_files)} resumes with {model_choice}..."):
157
 
 
178
 
179
  # 3. Call the appropriate model's pipeline based on the model choice (default to TF-IDF)
180
  if model_choice == "BERT":
181
+ bert_model = get_bert_model()
182
+ ranked_resumes, message = recruiter_bert(raw_job_text,
183
+ raw_resume_texts,
184
+ model=bert_model,
185
+ top_k=top_k)
186
  else:
187
+ vectorizer = get_recruiter_vectorizer()
188
+ ranked_resumes, message = recruiter_tfidf(raw_job_text,
189
+ raw_resume_texts,
190
+ vectorizer=vectorizer,
191
+ top_k=top_k)
192
 
193
  # 4. Display results
194
  if not ranked_resumes:
 
207
  min_value=0,
208
  max_value=1,),
209
  },
210
+ width='stretch',
211
  hide_index=True,
212
  )
213
 
pipelines/core/applicant.py CHANGED
@@ -13,7 +13,9 @@ def load_job_titles(job_csv_path: str):
13
  raise ValueError("Job CSV must contain a 'title' column.")
14
  return df
15
 
16
- def run_tfidf_pipeline(raw_resume: str,
 
 
17
  local_vectorizer_path=None,
18
  local_matrix_path=None,
19
  repo_id="Om-Shandilya/resume-matcher-tfidf",
@@ -25,6 +27,8 @@ def run_tfidf_pipeline(raw_resume: str,
25
 
26
  Args:
27
  raw_resume (str): Raw text of the resume.
 
 
28
  local_vectorizer_path (str, optional): Local path to TF-IDF vectorizer.
29
  local_matrix_path (str, optional): Local path to TF-IDF matrix.
30
  repo_id (str): Hugging Face repo ID for vectorizer/matrix.
@@ -38,8 +42,11 @@ def run_tfidf_pipeline(raw_resume: str,
38
  """
39
  cleaned_resume = clean_text(raw_resume)
40
 
41
- vectorizer = load_tfidf_vectorizer(local_vectorizer_path, repo_id, vectorizer_filename)
42
- job_matrix = load_tfidf_matrix(local_matrix_path, repo_id, matrix_filename)
 
 
 
43
 
44
  resume_vector = vectorizer.transform([cleaned_resume])
45
  sim_matrix = compute_similarity_matrix(resume_vector, job_matrix)
@@ -68,10 +75,12 @@ def run_tfidf_pipeline(raw_resume: str,
68
  print(f"[{job_idx}] {job_df.iloc[job_idx]['title']} → {score:0.6f}")
69
  print("==============================================")
70
 
71
- return [(job_df.iloc[j]['title'], score) for j, score in matches[0]],message
72
 
73
 
74
- def run_bert_pipeline(raw_resume: str,
 
 
75
  local_bert_path=None,
76
  local_index_path=None,
77
  repo_id="Om-Shandilya/resume-matcher-bert",
@@ -82,6 +91,8 @@ def run_bert_pipeline(raw_resume: str,
82
 
83
  Args:
84
  raw_resume (str): Raw text of the resume.
 
 
85
  local_bert_path (str, optional): Local path to BERT model.
86
  local_index_path (str, optional): Local path to FAISS index.
87
  repo_id (str): Hugging Face repo ID for model/index.
@@ -92,8 +103,11 @@ def run_bert_pipeline(raw_resume: str,
92
  Returns:
93
  List[Tuple[str, float]]: List of (job_title, score) for top_k matches.
94
  """
95
- model = load_bert_model(local_bert_path=local_bert_path, repo_id=repo_id)
96
- job_index = load_faiss_index(local_index_path, repo_id, index_filename)
 
 
 
97
 
98
  cleaned_resume = clean_text_for_bert(raw_resume)
99
  resume_embedding = model.encode([cleaned_resume], normalize_embeddings=True)
 
13
  raise ValueError("Job CSV must contain a 'title' column.")
14
  return df
15
 
16
+ def run_tfidf_pipeline(raw_resume: str, *,
17
+ vectorizer=None,
18
+ job_matrix=None,
19
  local_vectorizer_path=None,
20
  local_matrix_path=None,
21
  repo_id="Om-Shandilya/resume-matcher-tfidf",
 
27
 
28
  Args:
29
  raw_resume (str): Raw text of the resume.
30
+ vectorizer (TfidfVectorizer, optional): Preloaded TF-IDF vectorizer.
31
+ job_matrix (scipy.sparse matrix, optional): Preloaded TF-IDF job matrix
32
  local_vectorizer_path (str, optional): Local path to TF-IDF vectorizer.
33
  local_matrix_path (str, optional): Local path to TF-IDF matrix.
34
  repo_id (str): Hugging Face repo ID for vectorizer/matrix.
 
42
  """
43
  cleaned_resume = clean_text(raw_resume)
44
 
45
+ if vectorizer is None:
46
+ vectorizer = load_tfidf_vectorizer(local_vectorizer_path, repo_id, vectorizer_filename)
47
+
48
+ if job_matrix is None:
49
+ job_matrix = load_tfidf_matrix(local_matrix_path, repo_id, matrix_filename)
50
 
51
  resume_vector = vectorizer.transform([cleaned_resume])
52
  sim_matrix = compute_similarity_matrix(resume_vector, job_matrix)
 
75
  print(f"[{job_idx}] {job_df.iloc[job_idx]['title']} → {score:0.6f}")
76
  print("==============================================")
77
 
78
+ return [(job_df.iloc[j]['title'], score) for j, score in matches[0]], message
79
 
80
 
81
+ def run_bert_pipeline(raw_resume: str, *,
82
+ model=None,
83
+ job_index=None,
84
  local_bert_path=None,
85
  local_index_path=None,
86
  repo_id="Om-Shandilya/resume-matcher-bert",
 
91
 
92
  Args:
93
  raw_resume (str): Raw text of the resume.
94
+ model (SentenceTransformer, optional): Preloaded BERT model.
95
+ job_index (faiss.Index, optional): Preloaded FAISS index.
96
  local_bert_path (str, optional): Local path to BERT model.
97
  local_index_path (str, optional): Local path to FAISS index.
98
  repo_id (str): Hugging Face repo ID for model/index.
 
103
  Returns:
104
  List[Tuple[str, float]]: List of (job_title, score) for top_k matches.
105
  """
106
+ if model is None:
107
+ model = load_bert_model(local_bert_path=local_bert_path, repo_id=repo_id)
108
+
109
+ if job_index is None:
110
+ job_index = load_faiss_index(local_index_path, repo_id, index_filename)
111
 
112
  cleaned_resume = clean_text_for_bert(raw_resume)
113
  resume_embedding = model.encode([cleaned_resume], normalize_embeddings=True)
pipelines/core/recruiter.py CHANGED
@@ -6,17 +6,32 @@ from src.processing.text_cleaning import clean_text, clean_text_for_bert
6
 
7
 
8
  def rank_with_tfidf(raw_job_text, raw_resume_texts, *,
 
9
  local_vectorizer_path=None,
10
  repo_id="Om-Shandilya/resume-matcher-tfidf",
11
  filename="recruiter/combined_vectorizer.pkl",
12
  top_k=None,
13
  debug=False):
14
- """Rank resumes using TF-IDF similarity."""
15
- vectorizer = load_tfidf_vectorizer(
16
- local_vectorizer_path=local_vectorizer_path,
17
- repo_id=repo_id,
18
- filename=filename
19
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  cleaned_job_text = clean_text(raw_job_text)
22
  job_vector = vectorizer.transform([cleaned_job_text])
@@ -56,12 +71,28 @@ def rank_with_tfidf(raw_job_text, raw_resume_texts, *,
56
 
57
 
58
  def rank_with_bert(raw_job_text, raw_resume_texts, *,
 
59
  local_bert_path=None,
60
  repo_id="Om-Shandilya/resume-matcher-bert",
61
  top_k=None,
62
  debug=False):
63
- """Rank resumes using BERT embeddings."""
64
- model = load_bert_model(local_bert_path=local_bert_path, repo_id=repo_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  cleaned_job_text = clean_text_for_bert(raw_job_text)
67
  job_embedding = model.encode([cleaned_job_text], normalize_embeddings=True)
 
6
 
7
 
8
  def rank_with_tfidf(raw_job_text, raw_resume_texts, *,
9
+ vectorizer=None,
10
  local_vectorizer_path=None,
11
  repo_id="Om-Shandilya/resume-matcher-tfidf",
12
  filename="recruiter/combined_vectorizer.pkl",
13
  top_k=None,
14
  debug=False):
15
+ """Rank resumes using TF-IDF similarity.
16
+
17
+ Args:
18
+ raw_job_text (str): Raw text of the job description.
19
+ raw_resume_texts (dict): Dictionary of resume filenames and their raw texts.
20
+ vectorizer (TfidfVectorizer, optional): Preloaded TF-IDF vectorizer.
21
+ local_vectorizer_path (str, optional): Local path to TF-IDF vectorizer.
22
+ repo_id (str): Hugging Face repo ID for vectorizer.
23
+ filename (str): Filename of the vectorizer in the repo.
24
+ top_k (int, optional): Number of top matches to return. If None, return all.
25
+ debug (bool, optional): Print raw similarity scores for both and cleaned resume.
26
+
27
+ Returns:
28
+ List[Tuple[str, float]]: List of (resume_filename, score) for top_k matches. and message.
29
+ """
30
+
31
+ if vectorizer is None:
32
+ vectorizer = load_tfidf_vectorizer(local_vectorizer_path=local_vectorizer_path,
33
+ repo_id=repo_id,
34
+ filename=filename)
35
 
36
  cleaned_job_text = clean_text(raw_job_text)
37
  job_vector = vectorizer.transform([cleaned_job_text])
 
71
 
72
 
73
  def rank_with_bert(raw_job_text, raw_resume_texts, *,
74
+ model=None,
75
  local_bert_path=None,
76
  repo_id="Om-Shandilya/resume-matcher-bert",
77
  top_k=None,
78
  debug=False):
79
+ """Rank resumes using BERT embeddings.
80
+
81
+ Args:
82
+ raw_job_text (str): Raw text of the job description.
83
+ raw_resume_texts (dict): Dictionary of resume filenames and their raw text.
84
+ model (SentenceTransformer, optional): Preloaded BERT model.
85
+ local_bert_path (str, optional): Local path to BERT model.
86
+ repo_id (str): Hugging Face repo ID for model.
87
+ top_k (int, optional): Maximum number of matches to show. If None, show all.
88
+ debug (bool, optional): Print raw similarity scores.
89
+
90
+ Returns:
91
+ List[Tuple[str, float]]: List of (resume_filename, score) for top_k matches. and message.
92
+ """
93
+
94
+ if model is None:
95
+ model = load_bert_model(local_bert_path=local_bert_path, repo_id=repo_id)
96
 
97
  cleaned_job_text = clean_text_for_bert(raw_job_text)
98
  job_embedding = model.encode([cleaned_job_text], normalize_embeddings=True)
src/feature_engg/bert_embedding_data.py CHANGED
@@ -7,11 +7,10 @@ import torch
7
  from faiss import read_index
8
  from typing import Optional
9
  from sentence_transformers import SentenceTransformer, models
10
- from transformers import AutoTokenizer, AutoModel
11
  from huggingface_hub import hf_hub_download
12
 
13
 
14
- def get_bert_model(model_name: str,
15
  device: str = None):
16
  """
17
  Loads a BERT-based sentence transformer model for embeddings.
@@ -84,7 +83,7 @@ def bert_embed_text(df: pd.DataFrame,
84
  df[text_column] = df[text_column].fillna("")
85
 
86
  if model is None:
87
- model = get_bert_model()
88
 
89
  embeddings = model.encode(
90
  df[text_column].tolist(),
 
7
  from faiss import read_index
8
  from typing import Optional
9
  from sentence_transformers import SentenceTransformer, models
 
10
  from huggingface_hub import hf_hub_download
11
 
12
 
13
+ def create_bert_model(model_name: str,
14
  device: str = None):
15
  """
16
  Loads a BERT-based sentence transformer model for embeddings.
 
83
  df[text_column] = df[text_column].fillna("")
84
 
85
  if model is None:
86
+ model = create_bert_model()
87
 
88
  embeddings = model.encode(
89
  df[text_column].tolist(),
src/utils/model_loader.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from src.feature_engg.bert_embedding_data import load_bert_model
3
+ from src.feature_engg.tfidf_vectorizing_data import load_tfidf_vectorizer, load_tfidf_matrix
4
+ from src.feature_engg.bert_embedding_data import load_faiss_index
5
+
6
+ # Usinf streamlit's caching mechanism to load models and artifacts only once
7
+ @st.cache_resource
8
+ def get_bert_model():
9
+ """Loads and caches the BERT model."""
10
+
11
+ return load_bert_model(local_bert_path=None,
12
+ repo_id="Om-Shandilya/resume-matcher-bert")
13
+
14
+ @st.cache_resource
15
+ def get_faiss_index():
16
+ """Loads and caches the FAISS index for the applicant view."""
17
+
18
+ return load_faiss_index(local_index_path=None,
19
+ repo_id="Om-Shandilya/resume-matcher-bert",
20
+ filename="applicant/jobs.faiss")
21
+
22
+ @st.cache_resource
23
+ def get_applicant_vectorizer():
24
+ """Loads and caches the TF-IDF vectorizer for the applicant view."""
25
+
26
+ return load_tfidf_vectorizer(local_vectorizer_path=None,
27
+ repo_id="Om-Shandilya/resume-matcher-tfidf",
28
+ filename="applicant/job_vectorizer.pkl")
29
+ @st.cache_resource
30
+ def get_applicant_matrix():
31
+ """Loads and caches the TF-IDF matrix for the applicant view."""
32
+
33
+ return load_tfidf_matrix(local_matrix_path=None,
34
+ repo_id="Om-Shandilya/resume-matcher-tfidf",
35
+ filename="applicant/job_matrix.npz")
36
+
37
+
38
+ @st.cache_resource
39
+ def get_recruiter_vectorizer():
40
+ """Loads and caches the TF-IDF vectorizer for the recruiter view."""
41
+
42
+ return load_tfidf_vectorizer(local_vectorizer_path=None,
43
+ repo_id="Om-Shandilya/resume-matcher-tfidf",
44
+ filename="recruiter/combined_vectorizer.pkl")