jobsearch-mcp-server / src /services /job_search_service.py
daniielyan's picture
πŸ” Add debug logging to JobSearchService for Remotive API responses, including URL, parameters, job count, and error handling.
9e0d988
"""Job search service for fetching and ranking job posts."""
import json
import os
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
from urllib.parse import quote
import logging
import requests
from bs4 import BeautifulSoup
from ..config import get_settings
from .embedding_service import EmbeddingService
from .profile_service import ProfileService, UserProfile
class JobSearchService:
"""Service for searching and ranking job posts."""
def __init__(self):
self.settings = get_settings()
self.embedding_service = EmbeddingService()
self.profile_service = ProfileService()
self.jobs_cache = {}
self._load_cache()
def _load_cache(self):
"""Load jobs cache from file."""
try:
if os.path.exists(self.settings.jobs_cache_path):
with open(self.settings.jobs_cache_path, "r", encoding="utf-8") as f:
self.jobs_cache = json.load(f)
except Exception as e:
print(f"Error loading jobs cache: {e}")
self.jobs_cache = {}
def _save_cache(self):
"""Save jobs cache to file."""
try:
os.makedirs(os.path.dirname(self.settings.jobs_cache_path), exist_ok=True)
with open(self.settings.jobs_cache_path, "w", encoding="utf-8") as f:
json.dump(self.jobs_cache, f, indent=2, default=str)
except Exception as e:
print(f"Error saving jobs cache: {e}")
def _fetch_indeed_jobs(
self, query: str, location: str, job_type: str
) -> List[Dict[str, Any]]:
"""Fetch jobs from Indeed (web scraping - for demo purposes)."""
jobs = []
try:
# Construct Indeed search URL
base_url = "https://www.indeed.com/jobs"
params = {
"q": query,
"l": location,
"jt": job_type
if job_type in ["fulltime", "parttime", "contract"]
else "",
"limit": min(self.settings.max_jobs_per_search, 50),
}
# Build URL
url = f"{base_url}?" + "&".join(
[f"{k}={quote(str(v))}" for k, v in params.items() if v]
)
# Headers to mimic browser request
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
# Make request with timeout
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
# Parse HTML
soup = BeautifulSoup(response.content, "html.parser")
# Find job cards (Indeed's structure may change)
job_cards = soup.find_all("div", class_="job_seen_beacon")
for i, card in enumerate(job_cards[: self.settings.max_jobs_per_search]):
try:
# Extract job information
title_elem = card.find("h2", class_="jobTitle")
title = (
title_elem.get_text(strip=True)
if title_elem
else f"Position {i + 1}"
)
company_elem = card.find("span", class_="companyName")
company = (
company_elem.get_text(strip=True)
if company_elem
else "Unknown Company"
)
location_elem = card.find("div", class_="companyLocation")
job_location = (
location_elem.get_text(strip=True)
if location_elem
else location
)
summary_elem = card.find("div", class_="job-snippet")
summary = summary_elem.get_text(strip=True) if summary_elem else ""
# Try to get job URL
link_elem = title_elem.find("a") if title_elem else None
job_url = (
f"https://www.indeed.com{link_elem['href']}"
if link_elem and link_elem.get("href")
else ""
)
job = {
"id": f"indeed_{i}_{hash(title + company)}",
"title": title,
"company": company,
"location": job_location,
"description": summary,
"requirements": "", # Would need individual job page scraping
"salary": "", # Would need more detailed scraping
"url": job_url,
"posted_date": datetime.now().isoformat(),
"job_type": job_type or "full-time",
"source": "indeed",
}
jobs.append(job)
except Exception as e:
print(f"Error parsing job card {i}: {e}")
continue
except Exception as e:
print(f"Error fetching Indeed jobs: {e}")
return jobs
def _fetch_sample_jobs(
self, query: str, location: str, job_type: str
) -> List[Dict[str, Any]]:
"""Generate sample jobs for demo purposes."""
sample_jobs = [
{
"id": "sample_1",
"title": f"Senior {query} Developer",
"company": "TechCorp Solutions",
"location": location or "Remote",
"description": f"We are looking for an experienced {query} developer to join our dynamic team. You will work on cutting-edge projects using modern technologies and best practices.",
"requirements": f"5+ years experience with {query}, strong problem-solving skills, team collaboration",
"salary": "$80,000 - $120,000",
"url": "https://example.com/job1",
"posted_date": datetime.now().isoformat(),
"job_type": job_type or "full-time",
"source": "sample",
},
{
"id": "sample_2",
"title": f"{query} Engineer",
"company": "InnovateLabs",
"location": location or "New York, NY",
"description": f"Join our team as a {query} engineer and help build the next generation of applications. Great opportunity for growth and learning.",
"requirements": f"3+ years {query} experience, bachelor's degree preferred, excellent communication skills",
"salary": "$70,000 - $95,000",
"url": "https://example.com/job2",
"posted_date": (datetime.now() - timedelta(days=1)).isoformat(),
"job_type": job_type or "full-time",
"source": "sample",
},
{
"id": "sample_3",
"title": f"Junior {query} Specialist",
"company": "StartupXYZ",
"location": location or "San Francisco, CA",
"description": f"Great entry-level opportunity for {query} enthusiasts. We offer mentorship, training, and a collaborative environment.",
"requirements": f"1-2 years {query} experience, willingness to learn, passion for technology",
"salary": "$55,000 - $75,000",
"url": "https://example.com/job3",
"posted_date": (datetime.now() - timedelta(days=2)).isoformat(),
"job_type": job_type or "full-time",
"source": "sample",
},
]
return sample_jobs
def _fetch_remotive_jobs(
self, query: str, location: str, job_type: str
) -> List[Dict[str, Any]]:
"""Fetch jobs from the Remotive API."""
jobs = []
try:
params = {"search": query}
if location:
params["location"] = location
if job_type:
params["job_type"] = job_type
print(f"πŸ” Remotive Debug - URL: {self.settings.remotive_api_url}")
print(f"πŸ” Remotive Debug - Params: {params}")
response = requests.get(
self.settings.remotive_api_url, params=params, timeout=30
)
response.raise_for_status()
data = response.json()
remotive_jobs = data.get("jobs", [])
print(f"πŸ” Remotive Debug - Response: {len(remotive_jobs)} jobs found")
for item in remotive_jobs:
job = {
"id": f"remotive_{item.get('id')}",
"title": item.get("title", ""),
"company": item.get("company_name", ""),
"location": item.get("candidate_required_location", ""),
"description": item.get("description", ""),
"requirements": "",
"salary": item.get("salary", ""),
"url": item.get("url", ""),
"posted_date": item.get("publication_date", ""),
"job_type": job_type or item.get("job_type", ""),
"source": "remotive",
}
jobs.append(job)
print(f"πŸ” Remotive Debug - Processed: {len(jobs)} jobs")
except Exception as e:
print(f"πŸ” Remotive Debug - Error: {str(e)}")
logging.error("Error fetching Remotive jobs: %s", e)
return jobs
def _fetch_adzuna_jobs(
self, query: str, location: str, job_type: str
) -> List[Dict[str, Any]]:
"""Fetch jobs from the Adzuna API."""
jobs = []
if not self.settings.adzuna_app_id or not self.settings.adzuna_app_key:
return jobs
try:
base_url = f"https://api.adzuna.com/v1/api/jobs/{self.settings.adzuna_country}/search/1"
params = {
"app_id": self.settings.adzuna_app_id,
"app_key": self.settings.adzuna_app_key,
"what": query,
"where": location,
"results_per_page": min(self.settings.max_jobs_per_search, 50),
"content-type": "application/json",
}
response = requests.get(base_url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
for item in data.get("results", []):
job = {
"id": f"adzuna_{item.get('id')}",
"title": item.get("title", ""),
"company": item.get("company", {}).get("display_name", ""),
"location": item.get("location", {}).get("display_name", ""),
"description": item.get("description", ""),
"requirements": item.get("description", ""),
"salary": item.get("salary_is_predicted", ""),
"url": item.get("redirect_url", ""),
"posted_date": item.get("created", ""),
"job_type": job_type or item.get("contract_time", ""),
"source": "adzuna",
}
jobs.append(job)
except Exception as e:
logging.error("Error fetching Adzuna jobs: %s", e)
return jobs
def search_jobs(
self, user_id: str, query: str = "", location: str = "", job_type: str = ""
) -> Dict[str, Any]:
"""
Search for jobs and rank them by relevance to user profile.
Args:
user_id: User identifier for personalization
query: Job search query/keywords
location: Preferred job location
job_type: Type of job (full-time, contract, remote, etc.)
Returns:
Dict with ranked job listings and fit scores
"""
try:
# Get user profile
profile = self.profile_service.get_profile(user_id)
if not profile:
return {
"success": False,
"message": "User profile not found. Please create a profile first.",
}
# Create cache key
cache_key = f"{query}_{location}_{job_type}"
current_time = datetime.now()
# Check cache (expire after 1 hour)
if cache_key in self.jobs_cache:
cached_data = self.jobs_cache[cache_key]
cache_time = datetime.fromisoformat(cached_data["timestamp"])
if current_time - cache_time < timedelta(hours=1):
fresh_jobs = cached_data["jobs"]
else:
fresh_jobs = self._fetch_fresh_jobs(query, location, job_type)
self.jobs_cache[cache_key] = {
"jobs": fresh_jobs,
"timestamp": current_time.isoformat(),
}
self._save_cache()
else:
fresh_jobs = self._fetch_fresh_jobs(query, location, job_type)
self.jobs_cache[cache_key] = {
"jobs": fresh_jobs,
"timestamp": current_time.isoformat(),
}
self._save_cache()
if not fresh_jobs:
return {
"success": True,
"jobs": [],
"total_found": 0,
"message": "No jobs found for the given criteria",
"search_params": {
"query": query,
"location": location,
"job_type": job_type,
},
}
# Add jobs to embedding index
self.embedding_service.add_job_embeddings(fresh_jobs)
# Create profile text for matching
profile_text = (
f"{' '.join(profile.skills)} {profile.career_goals} {profile.resume}"
)
# Search for similar jobs
similar_jobs = self.embedding_service.search_similar_jobs(
profile_text, k=min(len(fresh_jobs), self.settings.max_jobs_per_search)
)
# Format results with fit scores
ranked_jobs = []
for job_meta, similarity_score in similar_jobs:
# Find the full job data
full_job = next(
(job for job in fresh_jobs if job["id"] == job_meta["job_id"]), None
)
if full_job:
# Calculate fit percentage (similarity score is cosine similarity)
fit_percentage = max(0, min(100, int(similarity_score * 100)))
ranked_job = {
**full_job,
"fit_score": fit_percentage,
"match_reasons": self._get_match_reasons(
profile, full_job, similarity_score
),
}
ranked_jobs.append(ranked_job)
# Sort by fit score
ranked_jobs.sort(key=lambda x: x["fit_score"], reverse=True)
return {
"success": True,
"jobs": ranked_jobs,
"total_found": len(ranked_jobs),
"search_params": {
"query": query,
"location": location,
"job_type": job_type,
},
"user_profile": {
"skills_count": len(profile.skills),
"location": profile.location,
},
}
except Exception as e:
return {"success": False, "message": f"Error searching jobs: {str(e)}"}
def _fetch_fresh_jobs(
self, query: str, location: str, job_type: str
) -> List[Dict[str, Any]]:
"""Fetch fresh jobs from multiple sources."""
all_jobs: List[Dict[str, Any]] = []
# Remotive API
remotive_jobs = self._fetch_remotive_jobs(query, location, job_type)
all_jobs.extend(remotive_jobs)
# Adzuna API
adzuna_jobs = self._fetch_adzuna_jobs(query, location, job_type)
all_jobs.extend(adzuna_jobs)
# Sample jobs as fallback
sample_jobs = self._fetch_sample_jobs(query, location, job_type)
all_jobs.extend(sample_jobs)
return all_jobs
def _get_match_reasons(
self, profile: UserProfile, job: Dict[str, Any], similarity_score: float
) -> List[str]:
"""Generate reasons why this job matches the user profile."""
reasons = []
# Check skill matches
job_text = f"{job['title']} {job['description']} {job['requirements']}".lower()
matching_skills = [
skill for skill in profile.skills if skill.lower() in job_text
]
if matching_skills:
reasons.append(f"Skills match: {', '.join(matching_skills[:3])}")
# Check location preference
if profile.location and profile.location.lower() in job["location"].lower():
reasons.append("Location preference match")
# Check job type preference
if similarity_score > 0.8:
reasons.append("High relevance to your background")
elif similarity_score > 0.6:
reasons.append("Good match for your experience")
# Check career goals alignment
if any(
goal_word in job_text
for goal_word in profile.career_goals.lower().split()[:5]
):
reasons.append("Aligns with career goals")
return reasons[:3] # Limit to top 3 reasons