"""Job search service for fetching and ranking job posts.""" import json import os from datetime import datetime, timedelta from typing import Any, Dict, List, Optional from urllib.parse import quote import logging import requests from bs4 import BeautifulSoup from ..config import get_settings from .embedding_service import EmbeddingService from .profile_service import ProfileService, UserProfile class JobSearchService: """Service for searching and ranking job posts.""" def __init__(self): self.settings = get_settings() self.embedding_service = EmbeddingService() self.profile_service = ProfileService() self.jobs_cache = {} self._load_cache() def _load_cache(self): """Load jobs cache from file.""" try: if os.path.exists(self.settings.jobs_cache_path): with open(self.settings.jobs_cache_path, "r", encoding="utf-8") as f: self.jobs_cache = json.load(f) except Exception as e: print(f"Error loading jobs cache: {e}") self.jobs_cache = {} def _save_cache(self): """Save jobs cache to file.""" try: os.makedirs(os.path.dirname(self.settings.jobs_cache_path), exist_ok=True) with open(self.settings.jobs_cache_path, "w", encoding="utf-8") as f: json.dump(self.jobs_cache, f, indent=2, default=str) except Exception as e: print(f"Error saving jobs cache: {e}") def _fetch_indeed_jobs( self, query: str, location: str, job_type: str ) -> List[Dict[str, Any]]: """Fetch jobs from Indeed (web scraping - for demo purposes).""" jobs = [] try: # Construct Indeed search URL base_url = "https://www.indeed.com/jobs" params = { "q": query, "l": location, "jt": job_type if job_type in ["fulltime", "parttime", "contract"] else "", "limit": min(self.settings.max_jobs_per_search, 50), } # Build URL url = f"{base_url}?" + "&".join( [f"{k}={quote(str(v))}" for k, v in params.items() if v] ) # Headers to mimic browser request headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } # Make request with timeout response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() # Parse HTML soup = BeautifulSoup(response.content, "html.parser") # Find job cards (Indeed's structure may change) job_cards = soup.find_all("div", class_="job_seen_beacon") for i, card in enumerate(job_cards[: self.settings.max_jobs_per_search]): try: # Extract job information title_elem = card.find("h2", class_="jobTitle") title = ( title_elem.get_text(strip=True) if title_elem else f"Position {i + 1}" ) company_elem = card.find("span", class_="companyName") company = ( company_elem.get_text(strip=True) if company_elem else "Unknown Company" ) location_elem = card.find("div", class_="companyLocation") job_location = ( location_elem.get_text(strip=True) if location_elem else location ) summary_elem = card.find("div", class_="job-snippet") summary = summary_elem.get_text(strip=True) if summary_elem else "" # Try to get job URL link_elem = title_elem.find("a") if title_elem else None job_url = ( f"https://www.indeed.com{link_elem['href']}" if link_elem and link_elem.get("href") else "" ) job = { "id": f"indeed_{i}_{hash(title + company)}", "title": title, "company": company, "location": job_location, "description": summary, "requirements": "", # Would need individual job page scraping "salary": "", # Would need more detailed scraping "url": job_url, "posted_date": datetime.now().isoformat(), "job_type": job_type or "full-time", "source": "indeed", } jobs.append(job) except Exception as e: print(f"Error parsing job card {i}: {e}") continue except Exception as e: print(f"Error fetching Indeed jobs: {e}") return jobs def _fetch_sample_jobs( self, query: str, location: str, job_type: str ) -> List[Dict[str, Any]]: """Generate sample jobs for demo purposes.""" sample_jobs = [ { "id": "sample_1", "title": f"Senior {query} Developer", "company": "TechCorp Solutions", "location": location or "Remote", "description": f"We are looking for an experienced {query} developer to join our dynamic team. You will work on cutting-edge projects using modern technologies and best practices.", "requirements": f"5+ years experience with {query}, strong problem-solving skills, team collaboration", "salary": "$80,000 - $120,000", "url": "https://example.com/job1", "posted_date": datetime.now().isoformat(), "job_type": job_type or "full-time", "source": "sample", }, { "id": "sample_2", "title": f"{query} Engineer", "company": "InnovateLabs", "location": location or "New York, NY", "description": f"Join our team as a {query} engineer and help build the next generation of applications. Great opportunity for growth and learning.", "requirements": f"3+ years {query} experience, bachelor's degree preferred, excellent communication skills", "salary": "$70,000 - $95,000", "url": "https://example.com/job2", "posted_date": (datetime.now() - timedelta(days=1)).isoformat(), "job_type": job_type or "full-time", "source": "sample", }, { "id": "sample_3", "title": f"Junior {query} Specialist", "company": "StartupXYZ", "location": location or "San Francisco, CA", "description": f"Great entry-level opportunity for {query} enthusiasts. We offer mentorship, training, and a collaborative environment.", "requirements": f"1-2 years {query} experience, willingness to learn, passion for technology", "salary": "$55,000 - $75,000", "url": "https://example.com/job3", "posted_date": (datetime.now() - timedelta(days=2)).isoformat(), "job_type": job_type or "full-time", "source": "sample", }, ] return sample_jobs def _fetch_remotive_jobs( self, query: str, location: str, job_type: str ) -> List[Dict[str, Any]]: """Fetch jobs from the Remotive API.""" jobs = [] try: params = {"search": query} if location: params["location"] = location if job_type: params["job_type"] = job_type print(f"🔍 Remotive Debug - URL: {self.settings.remotive_api_url}") print(f"🔍 Remotive Debug - Params: {params}") response = requests.get( self.settings.remotive_api_url, params=params, timeout=30 ) response.raise_for_status() data = response.json() remotive_jobs = data.get("jobs", []) print(f"🔍 Remotive Debug - Response: {len(remotive_jobs)} jobs found") for item in remotive_jobs: job = { "id": f"remotive_{item.get('id')}", "title": item.get("title", ""), "company": item.get("company_name", ""), "location": item.get("candidate_required_location", ""), "description": item.get("description", ""), "requirements": "", "salary": item.get("salary", ""), "url": item.get("url", ""), "posted_date": item.get("publication_date", ""), "job_type": job_type or item.get("job_type", ""), "source": "remotive", } jobs.append(job) print(f"🔍 Remotive Debug - Processed: {len(jobs)} jobs") except Exception as e: print(f"🔍 Remotive Debug - Error: {str(e)}") logging.error("Error fetching Remotive jobs: %s", e) return jobs def _fetch_adzuna_jobs( self, query: str, location: str, job_type: str ) -> List[Dict[str, Any]]: """Fetch jobs from the Adzuna API.""" jobs = [] if not self.settings.adzuna_app_id or not self.settings.adzuna_app_key: return jobs try: base_url = f"https://api.adzuna.com/v1/api/jobs/{self.settings.adzuna_country}/search/1" params = { "app_id": self.settings.adzuna_app_id, "app_key": self.settings.adzuna_app_key, "what": query, "where": location, "results_per_page": min(self.settings.max_jobs_per_search, 50), "content-type": "application/json", } response = requests.get(base_url, params=params, timeout=30) response.raise_for_status() data = response.json() for item in data.get("results", []): job = { "id": f"adzuna_{item.get('id')}", "title": item.get("title", ""), "company": item.get("company", {}).get("display_name", ""), "location": item.get("location", {}).get("display_name", ""), "description": item.get("description", ""), "requirements": item.get("description", ""), "salary": item.get("salary_is_predicted", ""), "url": item.get("redirect_url", ""), "posted_date": item.get("created", ""), "job_type": job_type or item.get("contract_time", ""), "source": "adzuna", } jobs.append(job) except Exception as e: logging.error("Error fetching Adzuna jobs: %s", e) return jobs def search_jobs( self, user_id: str, query: str = "", location: str = "", job_type: str = "" ) -> Dict[str, Any]: """ Search for jobs and rank them by relevance to user profile. Args: user_id: User identifier for personalization query: Job search query/keywords location: Preferred job location job_type: Type of job (full-time, contract, remote, etc.) Returns: Dict with ranked job listings and fit scores """ try: # Get user profile profile = self.profile_service.get_profile(user_id) if not profile: return { "success": False, "message": "User profile not found. Please create a profile first.", } # Create cache key cache_key = f"{query}_{location}_{job_type}" current_time = datetime.now() # Check cache (expire after 1 hour) if cache_key in self.jobs_cache: cached_data = self.jobs_cache[cache_key] cache_time = datetime.fromisoformat(cached_data["timestamp"]) if current_time - cache_time < timedelta(hours=1): fresh_jobs = cached_data["jobs"] else: fresh_jobs = self._fetch_fresh_jobs(query, location, job_type) self.jobs_cache[cache_key] = { "jobs": fresh_jobs, "timestamp": current_time.isoformat(), } self._save_cache() else: fresh_jobs = self._fetch_fresh_jobs(query, location, job_type) self.jobs_cache[cache_key] = { "jobs": fresh_jobs, "timestamp": current_time.isoformat(), } self._save_cache() if not fresh_jobs: return { "success": True, "jobs": [], "total_found": 0, "message": "No jobs found for the given criteria", "search_params": { "query": query, "location": location, "job_type": job_type, }, } # Add jobs to embedding index self.embedding_service.add_job_embeddings(fresh_jobs) # Create profile text for matching profile_text = ( f"{' '.join(profile.skills)} {profile.career_goals} {profile.resume}" ) # Search for similar jobs similar_jobs = self.embedding_service.search_similar_jobs( profile_text, k=min(len(fresh_jobs), self.settings.max_jobs_per_search) ) # Format results with fit scores ranked_jobs = [] for job_meta, similarity_score in similar_jobs: # Find the full job data full_job = next( (job for job in fresh_jobs if job["id"] == job_meta["job_id"]), None ) if full_job: # Calculate fit percentage (similarity score is cosine similarity) fit_percentage = max(0, min(100, int(similarity_score * 100))) ranked_job = { **full_job, "fit_score": fit_percentage, "match_reasons": self._get_match_reasons( profile, full_job, similarity_score ), } ranked_jobs.append(ranked_job) # Sort by fit score ranked_jobs.sort(key=lambda x: x["fit_score"], reverse=True) return { "success": True, "jobs": ranked_jobs, "total_found": len(ranked_jobs), "search_params": { "query": query, "location": location, "job_type": job_type, }, "user_profile": { "skills_count": len(profile.skills), "location": profile.location, }, } except Exception as e: return {"success": False, "message": f"Error searching jobs: {str(e)}"} def _fetch_fresh_jobs( self, query: str, location: str, job_type: str ) -> List[Dict[str, Any]]: """Fetch fresh jobs from multiple sources.""" all_jobs: List[Dict[str, Any]] = [] # Remotive API remotive_jobs = self._fetch_remotive_jobs(query, location, job_type) all_jobs.extend(remotive_jobs) # Adzuna API adzuna_jobs = self._fetch_adzuna_jobs(query, location, job_type) all_jobs.extend(adzuna_jobs) # Sample jobs as fallback sample_jobs = self._fetch_sample_jobs(query, location, job_type) all_jobs.extend(sample_jobs) return all_jobs def _get_match_reasons( self, profile: UserProfile, job: Dict[str, Any], similarity_score: float ) -> List[str]: """Generate reasons why this job matches the user profile.""" reasons = [] # Check skill matches job_text = f"{job['title']} {job['description']} {job['requirements']}".lower() matching_skills = [ skill for skill in profile.skills if skill.lower() in job_text ] if matching_skills: reasons.append(f"Skills match: {', '.join(matching_skills[:3])}") # Check location preference if profile.location and profile.location.lower() in job["location"].lower(): reasons.append("Location preference match") # Check job type preference if similarity_score > 0.8: reasons.append("High relevance to your background") elif similarity_score > 0.6: reasons.append("Good match for your experience") # Check career goals alignment if any( goal_word in job_text for goal_word in profile.career_goals.lower().split()[:5] ): reasons.append("Aligns with career goals") return reasons[:3] # Limit to top 3 reasons