import streamlit as st import spacy import pandas as pd import numpy as np from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.ensemble import GradientBoostingRegressor from sklearn.preprocessing import StandardScaler from sentence_transformers import SentenceTransformer from pyhealth.metrics import binary_metrics import mlflow import logging from system_monitor import SystemMonitor # Custom AIOPS module import torch from transformers import pipeline class AdvancedResumeProcessor: def __init__(self): self.nlp = spacy.load("en_core_web_trf") self.sentence_model = SentenceTransformer('all-mpnet-base-v2') self.system_monitor = SystemMonitor() self.logger = logging.getLogger('mlops') self.llm = pipeline('text-generation', model='gpt2-xl') if torch.cuda.is_available() else None # MLOps setup mlflow.set_tracking_uri("http://localhost:5000") self.experiment = mlflow.start_run() def _extract_entities(self, text): """Enhanced entity extraction with custom categories""" doc = self.nlp(text) return { 'skills': [ent.text for ent in doc.ents if ent.label_ == 'SKILL'], 'education': [ent.text for ent in doc.ents if ent.label_ == 'DEGREE'], 'experience': [ent.text for ent in doc.ents if ent.label_ == 'EXPERIENCE'] } def _generate_features(self, jd_entities, resume_text): """Generate multi-modal features""" resume_entities = self._extract_entities(resume_text) # Semantic similarity jd_embed = self.sentence_model.encode([resume_text])[0] resume_embed = self.sentence_model.encode([resume_text])[0] semantic_sim = cosine_similarity([jd_embed], [resume_embed])[0][0] # Entity matching scores skill_match = len(set(jd_entities['skills']) & set(resume_entities['skills'])) return { 'semantic_similarity': semantic_sim, 'skill_match': skill_match, 'education_match': int(any(deg in resume_entities['education'] for deg in jd_entities['education'])) } def train_model(self, X, y): """MLOps enabled training pipeline""" with mlflow.start_run(): preprocessor = ColumnTransformer([ ('text', Pipeline([ ('embed', SentenceTransformer('all-mpnet-base-v2')), ('scaler', StandardScaler()) ]), 'resume_text') ]) model = Pipeline([ ('preproc', preprocessor), ('regressor', GradientBoostingRegressor()) ]) model.fit(X, y) mlflow.sklearn.log_model(model, "model") return model class MLOpsDashboard: def __init__(self): self.metrics = { 'model_performance': [], 'system_health': [], 'data_quality': [] } def update_metrics(self, new_metrics): for k, v in new_metrics.items(): self.metrics[k].append(v) def main(): st.set_page_config(page_title="Enterprise Resume Ranker", layout="wide") st.title("🚀 Next-Gen Resume Ranking System with AIOPs/MLOps") processor = AdvancedResumeProcessor() dashboard = MLOpsDashboard() with st.sidebar: st.header("AIOPs Dashboard") processor.system_monitor.display_metrics() st.metric("Current Load", f"{processor.system_monitor.cpu_usage}% CPU") st.header("MLOps Controls") retrain = st.button("Retrain Production Model") if retrain: with st.spinner("Retraining model..."): # Add retraining logic here st.success("Model updated in production!") main_col1, main_col2 = st.columns([3, 2]) with main_col1: st.header("Upload Files") jd_file = st.file_uploader("Job Description (TXT/PDF)", type=["txt", "pdf"]) resume_files = st.file_uploader("Resumes (PDF/TXT)", type=["pdf", "txt"], accept_multiple_files=True) if jd_file and resume_files: try: # Process job description jd_text = processor.extract_text(jd_file) jd_entities = processor._extract_entities(jd_text) # Process resumes and generate features results = [] for file in resume_files: resume_text = processor.extract_text(file) features = processor._generate_features(jd_entities, resume_text) # Generate LLM feedback llm_feedback = processor.llm( f"Compare this resume to the job description: {jd_text[:1000]}... RESUME: {resume_text[:1000]}" )[0]['generated_text'] if processor.llm else "LLM unavailable" results.append({ "Filename": file.name, **features, "LLM Feedback": llm_feedback[:200] + "..." }) # Display results df = pd.DataFrame(results).sort_values("semantic_similarity", ascending=False) st.subheader("Ranking Results with Explainability") st.dataframe( df, column_config={ "semantic_similarity": "Semantic Match", "skill_match": "Skill Matches", "education_match": "Education Match" }, use_container_width=True ) # MLOps logging dashboard.update_metrics({ 'model_performance': df['semantic_similarity'].mean(), 'data_quality': len(resume_files) }) except Exception as e: processor.logger.error(f"Processing error: {str(e)}") st.error(f"System error: {str(e)}") with main_col2: st.header("Model Explainability") if 'df' in locals(): st.plotly_chart(create_shap_plot(df)) # Implement SHAP visualization st.download_button("Export Evaluation Report", generate_report(df), file_name="ranking_report.pdf") st.header("LLM Feedback Analysis") if 'df' in locals(): st.table(df[["Filename", "LLM Feedback"]].set_index("Filename")) if __name__ == "__main__": main()