import gradio as gr from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import numpy as np import os from datetime import datetime import socket import nltk #nltk.download("all") # For sentence tokenization nltk.download('punkt') nltk.download("punkt_tab") ############################### # LOGGING SETUP ############################### # Store logs inside repo in ./logs/Second_Opinion_Logs.log log_dir = "./logs" os.makedirs(log_dir, exist_ok=True) # ensure folder exists log_file_path = os.path.join(log_dir, "Second_Opinion_Logs.log") def log_action(action, request=None): """ Logs major actions with IP address and UTC timestamp. """ try: user_ip = "Unknown IP" if request and hasattr(request, 'client'): user_ip = request.client.host else: user_ip = socket.gethostbyname(socket.gethostname()) if user_ip in ("127.0.0.1", "::1"): user_ip = "Localhost (127.0.0.1)" except Exception: user_ip = "Unknown IP" timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") log_entry = f"{timestamp} (GMT) - IP: {user_ip} - {action}\n" try: with open(log_file_path, 'a') as log_file: log_file.write(log_entry) print(f"Log entry added: {log_entry.strip()}") except Exception as e: print(f"Error logging action: {e}") def log_input_text(resume_text, job_descriptions, user_ip="Unknown IP"): """ Logs the full input resume and job descriptions entered by the user. """ try: timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") log_entry = ( f"{timestamp} (GMT) - IP: {user_ip}\n" f"--- Resume Input ---\n{resume_text}\n\n" f"--- Job Descriptions Input ---\n{job_descriptions}\n" "---------------------------------------------\n" ) with open(log_file_path, 'a') as log_file: log_file.write(log_entry) print(f"β Logged full user input at {timestamp} from {user_ip}") except Exception as e: print(f"Error logging input: {e}") ############################### # LOAD MODELS ############################### models = { "all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"), "paraphrase-MiniLM-L6-v2": SentenceTransformer("paraphrase-MiniLM-L6-v2"), "multi-qa-MiniLM-L6-cos-v1": SentenceTransformer("multi-qa-MiniLM-L6-cos-v1"), "all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"), "paraphrase-mpnet-base-v2": SentenceTransformer("paraphrase-mpnet-base-v2"), "all-distilroberta-v1": SentenceTransformer("all-distilroberta-v1"), "paraphrase-albert-small-v2": SentenceTransformer("paraphrase-albert-small-v2"), "multi-qa-distilbert-cos-v1": SentenceTransformer("multi-qa-distilbert-cos-v1"), "distiluse-base-multilingual-cased-v2": SentenceTransformer("distiluse-base-multilingual-cased-v2"), "all-MiniLM-L12-v2": SentenceTransformer("all-MiniLM-L12-v2"), } ############################### # MAIN SIMILARITY FUNCTION ############################### def compute_similarity(resume_text, job_descriptions): try: if not resume_text.strip() or not job_descriptions.strip(): return "Error: Resume and job descriptions cannot be empty.", None # Split job descriptions by double line break job_list = job_descriptions.split("\n\n") if len(job_list) == 0: return "Error: Provide at least one job description separated by double line breaks.", None results = {} for model_name, model in models.items(): documents = [resume_text] + job_list embeddings = model.encode(documents) resume_embedding = embeddings[0] job_embeddings = embeddings[1:] similarities = cosine_similarity([resume_embedding], job_embeddings).flatten() results[model_name] = similarities df = pd.DataFrame(results, index=[f"Job {i+1}" for i in range(len(job_list))]).T metrics = { "Average": df.mean(axis=0), "Variance": df.var(axis=0), "Median": df.median(axis=0), "Standard Deviation": df.std(axis=0), "Certainty Score": 1 - (df.var(axis=0) / df.var(axis=0).max()), } for metric_name, values in metrics.items(): df.loc[metric_name] = values model_rows = df.iloc[:-5] metrics_rows = df.iloc[-5:] styled_df = model_rows.style.background_gradient(cmap="Greens", axis=None).to_html() styled_df += metrics_rows.to_html(header=False) best_job = metrics["Average"].idxmax() reasoning = f"The best job match is {best_job} based on the highest average similarity score." description = """
Explanation of the Table:
If you liked this application, feel free to send your feedback, suggestions, or adulations to 21f1006368@ds.study.iitm.ac.in.
""" return reasoning, styled_df + description except Exception as e: return f"Error during computation: {str(e)}", None ############################### # APPROACH A EXPLANATION ############################### def explain_scores_by_sentences(model, resume_text, job_text, top_k=3): from nltk.tokenize import sent_tokenize resume_sents = sent_tokenize(resume_text) job_sents = sent_tokenize(job_text) if not resume_sents or not job_sents: return "No sentences found in resume or job description." resume_embeddings = model.encode(resume_sents) job_embeddings = model.encode(job_sents) sim_matrix = cosine_similarity(resume_embeddings, job_embeddings) flat_sim = sim_matrix.flatten() top_k_indices = np.argsort(flat_sim)[::-1][:top_k] explanation_html = "#{rank}:
Resume: {resume_sentence}
Job: {job_sentence}
Similarity Score: {score:.4f}
No results to display.
", gr.update(visible=False), ) except Exception as e: log_action(f"Error during process for IP {user_ip}: {str(e)}") raise e def show_details(table): return gr.update(value=table, visible=True) INVITE_CODE = "INDIAMBA" access_granted = gr.State(False) ############################### # BUILD THE GRADIO INTERFACE ############################### with gr.Blocks() as app: gr.Markdown("# Second Opinion with Naval v1.1 β βMidnight Tearsβ") gr.Markdown("π This app requires an invite code to continue. Ask Naval if you don't have one.") # β Instructions block gr.Markdown( """ ## π How to Use This App 1. **Enter Invite Code** to unlock the app. 2. **Paste Resume** in the left box. 3. **Paste Job Descriptions** in the right box. - Separate each job description with **two newlines** (press `Enter` twice). - Do **not** leave empty lines inside one job description. 4. Click **Match My Resume to Jobs** to see the initial results. 5. Click **Papa Please Preach More** to reveal the full similarity table. 6. Choose a **Job Index** (0 for first job, 1 for second, etc.). 7. Click on any **Explain [Model]** button to see which sentences matched best. --- π‘ *Tip:* Start small β paste 1 resume and 2β3 jobs for faster results. """ ) with gr.Row(): code_input = gr.Textbox(label="Enter Invite Code", type="password", placeholder="Ask Naval for access code") access_button = gr.Button("Submit") access_warning = gr.Markdown(value="Access denied. Please enter the correct invite code.", visible=False) main_ui = gr.Group(visible=False) with main_ui: gr.Markdown("### βοΈ Input Section") with gr.Row(): resume_input = gr.Textbox(label="Paste Resume", lines=5, placeholder="Paste your resume here...") job_input = gr.Textbox(label="Paste Job Descriptions", lines=5, placeholder="Paste job descriptions here (double line break to separate).") gr.Markdown("---") gr.Markdown("### π Matching Section") with gr.Row(): match_button = gr.Button("Match My Resume to Jobs") processing_output = gr.HTML(value="", visible=False) with gr.Row(): recommendation_output = gr.HTML(label="Recommendation", visible=True) with gr.Row(): table_output = gr.HTML(label="Similarity Table", visible=False) with gr.Row(): nerd_button = gr.Button("Papa Please Preach More", visible=False) gr.Markdown("---") gr.Markdown("### π Explanation Section") explanation_output = gr.HTML(label="Model Explanation", visible=False) match_button.click( process_and_display, inputs=[resume_input, job_input], outputs=[processing_output, recommendation_output, table_output, nerd_button] ) nerd_button.click(show_details, inputs=[table_output], outputs=[table_output]) with gr.Row(): job_index_to_explain = gr.Number(label="Job Index (0-based)", value=0) with gr.Row(): for m_name in models.keys(): btn = gr.Button(f"Explain {m_name}") btn.click( fn=lambda resume, jobs, idx, m=m_name: explain_model_scores(m, resume, jobs, idx), inputs=[resume_input, job_input, job_index_to_explain], outputs=[explanation_output], ) def check_invite(user_code): if user_code.strip() == INVITE_CODE: return True, gr.update(visible=False), gr.update(visible=True) else: return False, gr.update(visible=True), gr.update(visible=False) access_button.click(fn=check_invite, inputs=[code_input], outputs=[access_granted, access_warning, main_ui]) ############################### # CUSTOM CSS ############################### app.css = """ body { background: linear-gradient(120deg, #E0C3FC 0%, #8EC5FC 100%); margin: 0; padding: 0; font-family: 'Open Sans', sans-serif; min-height: 100vh; } .gradio-container { background-color: transparent !important; color: #333333; } #centered-recommendation { text-align: center; font-size: 1.2em; margin-top: 20px; margin-bottom: 20px; color: #2c3e50; } button.primary, button.secondary { background-color: #3498db !important; border: 1px solid #2980b9 !important; color: #fff !important; border-radius: 4px !important; } textarea, input[type='text'], input[type='number'] { background-color: #FFFFFF; color: #333; border-radius: 6px !important; border: 1px solid #ccc !important; padding: 8px !important; } h1, h2, h3 { color: #2c3e50; } """ resumepilot = app if __name__ == "__main__": resumepilot.launch() # import os # import io # import json # import random # import tempfile # import smtplib # from email.message import EmailMessage # from datetime import datetime, timedelta, timezone # from fastapi import FastAPI, UploadFile, Form, Request # from fastapi.responses import JSONResponse # from starlette.middleware.cors import CORSMiddleware # from sentence_transformers import SentenceTransformer, util # from PyPDF2 import PdfReader # import gradio as gr # import torch # import pytz # from dropbox_utils import upload_to_dropbox # import asyncio # os.environ["HF_HOME"] = "/app/cache" # os.environ["TRANSFORMERS_CACHE"] = "/app/cache" # os.environ["HF_DATASETS_CACHE"] = "/app/cache" # smtp_user = os.getenv("SMTP_USER") # smtp_pass = os.getenv("SMTP_PASS") # if not smtp_user or not smtp_pass: # raise EnvironmentError("SMTP credentials are not set in environment variables.") # # Setup model cache path # # os.environ["TRANSFORMERS_CACHE"] = os.environ.get("TRANSFORMERS_CACHE", "/app/cache") # # os.environ["HF_HOME"] = os.environ.get("HF_HOME", "/app/cache") # # === Profile Save/Load === # PROFILE_DIR = os.path.join(os.getenv("HF_HOME", "/app/cache"), "user_profiles") # os.makedirs(PROFILE_DIR, exist_ok=True) # def test_writable_dirs(): # for path in ["/app/cache", PROFILE_DIR, "/tmp"]: # print(f"π Checking write permission for: {path}") # if os.access(path, os.W_OK): # print("β Writable") # else: # print("β Not writable") # test_writable_dirs() # # from huggingface_hub import login # # # Load HF token and login # hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN") # # if hf_token: # # login(token=hf_token, add_to_git_credential=False, write_permission=False) # # === Load Model with CUDA if available and safe cache === # device = "cuda" if torch.cuda.is_available() else "cpu" # print(f"π Loading SentenceTransformer on: {device}") # # === Define Cohorts === # COHORTS = { # "consulting": "Management consulting, strategy, analytics, client interaction", # "bfsi": "Banking, finance, investment analysis, risk, fintech", # "sales": "Sales, business development, GTM strategy, CRM, channel sales", # "it": "Software development, cloud, AI/ML, backend systems", # "hr": "Human resources, L&D, talent acquisition, HRBP", # "legal": "Contracts, litigation, compliance, intellectual property", # "scm": "Logistics, procurement, inventory, operations, manufacturing", # "bpo": "Customer service, support, inbound/outbound calling, operations" # } # # === Helper to extract text from PDF === # def extract_text(file): # reader = PdfReader(file) # return "\n".join([page.extract_text() or "" for page in reader.pages]) # # === Gradio UI function for resume match === # def match_resume(resume_pdf, job_description): # text = extract_text(resume_pdf) # resume_emb = model.encode(text, convert_to_tensor=True) # jd_emb = model.encode(job_description, convert_to_tensor=True) # score = util.cos_sim(jd_emb, resume_emb).item() * 100 # label = "β Strong Match" if score > 70 else "β οΈ Needs Tailoring" # return f"Match Score: {round(score, 2)}%\n\n{label}" # demo = gr.Interface( # fn=match_resume, # inputs=[ # gr.File(label="Upload Resume PDF", file_types=[".pdf"]), # gr.Textbox(label="Paste Job Description", lines=6) # ], # outputs="text", # title="π§ Resume to JD Matcher", # description="Upload your resume PDF and paste a job description to get a similarity score and feedback!" # ) # # === FastAPI App === # fastapi_app = FastAPI() # model = None # COHORT_EMBEDDINGS = {} # # === Load model eagerly before app starts === # print("π Preloading SentenceTransformer model before app declaration...") # model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", use_auth_token=hf_token).to(device) # COHORT_EMBEDDINGS = { # k: model.encode(v, convert_to_tensor=True) for k, v in COHORTS.items() # } # print("β Model and cohort embeddings loaded.") # fastapi_app.add_middleware( # CORSMiddleware, # allow_origins=["*"], # allow_methods=["*"], # allow_headers=["*"] # ) # @fastapi_app.post("/debug") # async def debug(request: Request): # body = await request.body() # return {"body": body.decode(), "headers": dict(request.headers)} # @fastapi_app.get("/status") # async def status(): # return {"model_loaded": model is not None} # # === In-memory OTP store === # OTP_STORE = {} # from pydantic import BaseModel # class EmailRequest(BaseModel): # email: str # @fastapi_app.post("/send-otp") # async def send_otp(request: EmailRequest): # email = request.email # otp = str(random.randint(100000, 999999)) # expiry = datetime.now() + timedelta(minutes=10) # OTP_STORE[email] = (otp, expiry) # msg = EmailMessage() # msg["Subject"] = "Your ResumePilot Login OTP" # msg["From"] = smtp_user # msg["To"] = email # msg.set_content(f"Your one-time password (OTP) is: {otp}. It will expire in 10 minutes.") # try: # with smtplib.SMTP_SSL("smtp.gmail.com", 465) as smtp: # smtp.login(smtp_user, smtp_pass) # smtp.send_message(msg) # return {"status": "sent"} # except Exception as e: # return JSONResponse({"status": "error", "error": str(e)}, status_code=500) # from pydantic import BaseModel # from fastapi.responses import JSONResponse # from datetime import datetime # import random # class OTPVerifyRequest(BaseModel): # email: str # otp: str # @fastapi_app.post("/verify-otp") # async def verify_otp(request: OTPVerifyRequest): # email = request.email # otp = request.otp # stored = OTP_STORE.get(email) # if not stored: # return JSONResponse({"error": "No OTP found"}, status_code=400) # saved_otp, expiry = stored # if datetime.now() > expiry: # return JSONResponse({"error": "OTP expired"}, status_code=401) # if otp != saved_otp: # return JSONResponse({"error": "Invalid OTP"}, status_code=401) # # OTP valid β return a token and email # return {"token": f"token_{random.randint(100000, 999999)}", "email": email} # from fastapi import Form # from email.message import EmailMessage # import smtplib # import secrets # import os # MAGIC_TOKENS = {} # In-memory token storage # @fastapi_app.post("/send_magic_link") # async def send_magic_link(email: str = Form(...)): # username = email.split("@")[0] # token = secrets.token_urlsafe(16) # MAGIC_TOKENS[username] = token # link = f"https://tendermatcher.tech/campus/?token={token}&user={username}" # msg = EmailMessage() # msg["Subject"] = "π Your Magic Login Link" # msg["From"] = os.environ["GMAIL_USER"] # msg["To"] = email # msg.set_content(f"Hi {username},\n\nClick here to log in:\n{link}\n\nCheers,\nResumePilot") # with smtplib.SMTP_SSL("smtp.gmail.com", 465) as smtp: # smtp.login(os.environ["GMAIL_USER"], os.environ["GMAIL_APP_PASSWORD"]) # smtp.send_message(msg) # return {"status": "sent", "token": token} # @fastapi_app.post("/verify_magic_token") # async def verify_magic_token(user: str = Form(...), token: str = Form(...)): # stored_token = MAGIC_TOKENS.get(user) # if not stored_token: # return JSONResponse({"error": "No token found for user"}, status_code=400) # if token != stored_token: # return JSONResponse({"error": "Invalid token"}, status_code=401) # # β Valid magic link # return {"status": "verified", "user": user} # # === Resume Matcher endpoint === # @fastapi_app.post("/predict") # async def predict(file: UploadFile, jd: str = Form(...), email: str = Form(...)): # try: # content = await file.read() # pdf = io.BytesIO(content) # text = extract_text(pdf) # resume_emb = model.encode(text, convert_to_tensor=True) # jd_emb = model.encode(jd, convert_to_tensor=True) # score = util.cos_sim(jd_emb, resume_emb).item() * 100 # label = "Strong Match" if score > 70 else "Needs Tailoring" # with tempfile.TemporaryDirectory() as tmpdir: # upload_to_dropbox(content, f"/spc_cohort_data/{email}/resume.pdf") # jd_data = json.dumps({"score": round(score, 2), "feedback": label}).encode("utf-8") # upload_to_dropbox(jd_data, f"/spc_cohort_data/{email}/jd_match.json") # return JSONResponse({"score": round(score, 2), "feedback": label, "device": device}) # except Exception as e: # return JSONResponse({"error": str(e)}, status_code=500) # # === Cohort Predictor endpoint === # @fastapi_app.post("/cohort") # async def cohort_predict(name: str = Form(...), email: str = Form(...), summary: str = Form(...), quiz: str = Form(...)): # try: # combined = f"{name}\n{email}\n{summary}" # user_emb = model.encode(combined, convert_to_tensor=True) # scores = {cohort: util.cos_sim(user_emb, emb).item() for cohort, emb in COHORT_EMBEDDINGS.items()} # predicted = max(scores, key=scores.get) # with tempfile.TemporaryDirectory() as tmpdir: # quiz_bytes = json.dumps(json.loads(quiz)).encode("utf-8") # upload_to_dropbox(quiz_bytes, f"/spc_cohort_data/{email}/quiz.json") # cohort_result = { # "predicted_cohort": predicted, # "scores": {k: round(v * 100, 2) for k, v in scores.items()} # } # upload_to_dropbox(json.dumps(cohort_result).encode("utf-8"), f"/spc_cohort_data/{email}/cohort.json") # return JSONResponse(cohort_result) # except Exception as e: # return JSONResponse({"error": str(e)}, status_code=500) # from dropbox.exceptions import ApiError # import dropbox # DROPBOX_ACCESS_TOKEN = os.getenv("DROPBOX_REFRESH_TOKEN") # dbx = dropbox.Dropbox(DROPBOX_ACCESS_TOKEN) # @fastapi_app.post("/save_profile") # async def save_profile(request: Request): # form = await request.form() # email = form.get("email") # full_name = form.get("full_name") # job_title = form.get("job_title") # if not email or not full_name or not job_title: # return JSONResponse({"error": "Missing profile fields"}, status_code=400) # path = get_profile_path(email) # try: # try: # _, res = dbx.files_download(path) # profile_data = json.loads(res.content) # except ApiError: # profile_data = {} # profile_data.update({ # "full_name": full_name, # "job_title": job_title # }) # upload_to_dropbox( # json.dumps(profile_data).encode(), # path) # return JSONResponse({"success": True}) # except Exception as e: # return JSONResponse({"error": str(e)}, status_code=500) # @fastapi_app.get("/load_profile") # async def load_profile(email: str): # path = get_profile_path(email) # try: # _, res = dbx.files_download(path) # profile_data = json.loads(res.content) # return JSONResponse({ # "full_name": profile_data.get("full_name", ""), # "job_title": profile_data.get("job_title", "") # }) # except ApiError: # return JSONResponse({"full_name": "", "job_title": ""}) # except Exception as e: # return JSONResponse({"error": str(e)}, status_code=500) # def get_profile_path(email: str): # return f"/spc_cohort_data/{email}/profile.json" # @fastapi_app.post("/save_theme") # async def save_theme(request: Request): # form = await request.form() # email = form.get("email") # theme = form.get("theme") # if not email or not theme: # return JSONResponse({"error": "Missing email or theme"}, status_code=400) # path = get_profile_path(email) # try: # try: # _, res = dbx.files_download(path) # profile_data = json.loads(res.content) # except ApiError: # profile_data = {} # profile_data["theme"] = theme # upload_to_dropbox( # json.dumps(profile_data).encode(), # path # ) # return JSONResponse({"success": True}) # except Exception as e: # return JSONResponse({"error": str(e)}, status_code=500) # @fastapi_app.get("/load_theme") # async def load_theme(email: str): # path = get_profile_path(email) # try: # _, res = dbx.files_download(path) # profile_data = json.loads(res.content) # return JSONResponse({"theme": profile_data.get("theme", "light")}) # except ApiError: # return JSONResponse({"theme": "light"}) # except Exception as e: # return JSONResponse({"error": str(e)}, status_code=500) # # === Log Endpoint === # @fastapi_app.post("/log") # async def receive_log(request: Request): # try: # payload = await request.json() # timestamp = datetime.now(timezone.utc).astimezone(pytz.timezone("Asia/Kolkata")).isoformat() # payload["logged_at"] = timestamp # log_line = json.dumps(payload, ensure_ascii=False) + "\n" # today = datetime.now().strftime("%Y-%m-%d") # log_path = f"/spc_cohort_data/logs/{today}.jsonl" # upload_to_dropbox(log_line.encode("utf-8"), log_path, append=True) # return {"status": "logged"} # except Exception as e: # return JSONResponse({"error": str(e)}, status_code=500) # # === List routes endpoint === # @fastapi_app.get("/routes") # async def list_routes(): # return [ # { # "path": route.path, # "methods": list(route.methods), # "name": route.name # } # for route in fastapi_app.routes # if hasattr(route, "methods") # ] # @fastapi_app.get("/") # async def root_redirect(): # return JSONResponse({"message": "Visit /ui for the resume matcher and API routes"}) # @fastapi_app.get("/health") # async def health(): # return {"status": "ok"} # # === Gradio UI for listing routes === # from fastapi.testclient import TestClient # client = TestClient(fastapi_app) # def get_routes_str(): # response = client.get("/routes") # if response.status_code == 200: # return json.dumps(response.json(), indent=2) # else: # return f"Error fetching routes: {response.status_code}" # routes_demo = gr.Interface( # fn=get_routes_str, # inputs=[], # outputs="textbox", # title="API Routes", # description="List of all API routes exposed by FastAPI backend" # ) # with gr.Blocks() as ui: # with gr.Tabs(): # with gr.TabItem("Resume Matcher"): # demo.render() # with gr.TabItem("API Routes"): # routes_demo.render() # # Mount the combined UI at /ui # app = fastapi_app # if __name__ == "__main__": # import os # import uvicorn # ui.launch(server_name="0.0.0.0", server_port=7860)