Spaces:

Agents-MCP-Hackathon
/

jobsearch-mcp-server

Sleeping

jobsearch-mcp-server / src /services /job_search_service.py

🔍 Add debug logging to JobSearchService for Remotive API responses, including URL, parameters, job count, and error handling.

9e0d988 2 months ago

raw

history blame contribute delete

17.9 kB

	"""Job search service for fetching and ranking job posts."""

	import json
	import os
	from datetime import datetime, timedelta
	from typing import Any, Dict, List, Optional
	from urllib.parse import quote
	import logging

	import requests
	from bs4 import BeautifulSoup

	from ..config import get_settings
	from .embedding_service import EmbeddingService
	from .profile_service import ProfileService, UserProfile


	class JobSearchService:
	"""Service for searching and ranking job posts."""

	def __init__(self):
	self.settings = get_settings()
	self.embedding_service = EmbeddingService()
	self.profile_service = ProfileService()
	self.jobs_cache = {}
	self._load_cache()

	def _load_cache(self):
	"""Load jobs cache from file."""
	try:
	if os.path.exists(self.settings.jobs_cache_path):
	with open(self.settings.jobs_cache_path, "r", encoding="utf-8") as f:
	self.jobs_cache = json.load(f)
	except Exception as e:
	print(f"Error loading jobs cache: {e}")
	self.jobs_cache = {}

	def _save_cache(self):
	"""Save jobs cache to file."""
	try:
	os.makedirs(os.path.dirname(self.settings.jobs_cache_path), exist_ok=True)
	with open(self.settings.jobs_cache_path, "w", encoding="utf-8") as f:
	json.dump(self.jobs_cache, f, indent=2, default=str)
	except Exception as e:
	print(f"Error saving jobs cache: {e}")

	def _fetch_indeed_jobs(
	self, query: str, location: str, job_type: str
	) -> List[Dict[str, Any]]:
	"""Fetch jobs from Indeed (web scraping - for demo purposes)."""
	jobs = []

	try:
	# Construct Indeed search URL
	base_url = "https://www.indeed.com/jobs"
	params = {
	"q": query,
	"l": location,
	"jt": job_type
	if job_type in ["fulltime", "parttime", "contract"]
	else "",
	"limit": min(self.settings.max_jobs_per_search, 50),
	}

	# Build URL
	url = f"{base_url}?" + "&".join(
	[f"{k}={quote(str(v))}" for k, v in params.items() if v]
	)

	# Headers to mimic browser request
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	}

	# Make request with timeout
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()

	# Parse HTML
	soup = BeautifulSoup(response.content, "html.parser")

	# Find job cards (Indeed's structure may change)
	job_cards = soup.find_all("div", class_="job_seen_beacon")

	for i, card in enumerate(job_cards[: self.settings.max_jobs_per_search]):
	try:
	# Extract job information
	title_elem = card.find("h2", class_="jobTitle")
	title = (
	title_elem.get_text(strip=True)
	if title_elem
	else f"Position {i + 1}"
	)

	company_elem = card.find("span", class_="companyName")
	company = (
	company_elem.get_text(strip=True)
	if company_elem
	else "Unknown Company"
	)

	location_elem = card.find("div", class_="companyLocation")
	job_location = (
	location_elem.get_text(strip=True)
	if location_elem
	else location
	)

	summary_elem = card.find("div", class_="job-snippet")
	summary = summary_elem.get_text(strip=True) if summary_elem else ""

	# Try to get job URL
	link_elem = title_elem.find("a") if title_elem else None
	job_url = (
	f"https://www.indeed.com{link_elem['href']}"
	if link_elem and link_elem.get("href")
	else ""
	)

	job = {
	"id": f"indeed_{i}_{hash(title + company)}",
	"title": title,
	"company": company,
	"location": job_location,
	"description": summary,
	"requirements": "", # Would need individual job page scraping
	"salary": "", # Would need more detailed scraping
	"url": job_url,
	"posted_date": datetime.now().isoformat(),
	"job_type": job_type or "full-time",
	"source": "indeed",
	}

	jobs.append(job)

	except Exception as e:
	print(f"Error parsing job card {i}: {e}")
	continue

	except Exception as e:
	print(f"Error fetching Indeed jobs: {e}")

	return jobs

	def _fetch_sample_jobs(
	self, query: str, location: str, job_type: str
	) -> List[Dict[str, Any]]:
	"""Generate sample jobs for demo purposes."""
	sample_jobs = [
	{
	"id": "sample_1",
	"title": f"Senior {query} Developer",
	"company": "TechCorp Solutions",
	"location": location or "Remote",
	"description": f"We are looking for an experienced {query} developer to join our dynamic team. You will work on cutting-edge projects using modern technologies and best practices.",
	"requirements": f"5+ years experience with {query}, strong problem-solving skills, team collaboration",
	"salary": "$80,000 - $120,000",
	"url": "https://example.com/job1",
	"posted_date": datetime.now().isoformat(),
	"job_type": job_type or "full-time",
	"source": "sample",
	},
	{
	"id": "sample_2",
	"title": f"{query} Engineer",
	"company": "InnovateLabs",
	"location": location or "New York, NY",
	"description": f"Join our team as a {query} engineer and help build the next generation of applications. Great opportunity for growth and learning.",
	"requirements": f"3+ years {query} experience, bachelor's degree preferred, excellent communication skills",
	"salary": "$70,000 - $95,000",
	"url": "https://example.com/job2",
	"posted_date": (datetime.now() - timedelta(days=1)).isoformat(),
	"job_type": job_type or "full-time",
	"source": "sample",
	},
	{
	"id": "sample_3",
	"title": f"Junior {query} Specialist",
	"company": "StartupXYZ",
	"location": location or "San Francisco, CA",
	"description": f"Great entry-level opportunity for {query} enthusiasts. We offer mentorship, training, and a collaborative environment.",
	"requirements": f"1-2 years {query} experience, willingness to learn, passion for technology",
	"salary": "$55,000 - $75,000",
	"url": "https://example.com/job3",
	"posted_date": (datetime.now() - timedelta(days=2)).isoformat(),
	"job_type": job_type or "full-time",
	"source": "sample",
	},
	]

	return sample_jobs

	def _fetch_remotive_jobs(
	self, query: str, location: str, job_type: str
	) -> List[Dict[str, Any]]:
	"""Fetch jobs from the Remotive API."""
	jobs = []

	try:
	params = {"search": query}
	if location:
	params["location"] = location
	if job_type:
	params["job_type"] = job_type

	print(f"🔍 Remotive Debug - URL: {self.settings.remotive_api_url}")
	print(f"🔍 Remotive Debug - Params: {params}")

	response = requests.get(
	self.settings.remotive_api_url, params=params, timeout=30
	)
	response.raise_for_status()
	data = response.json()

	remotive_jobs = data.get("jobs", [])
	print(f"🔍 Remotive Debug - Response: {len(remotive_jobs)} jobs found")

	for item in remotive_jobs:
	job = {
	"id": f"remotive_{item.get('id')}",
	"title": item.get("title", ""),
	"company": item.get("company_name", ""),
	"location": item.get("candidate_required_location", ""),
	"description": item.get("description", ""),
	"requirements": "",
	"salary": item.get("salary", ""),
	"url": item.get("url", ""),
	"posted_date": item.get("publication_date", ""),
	"job_type": job_type or item.get("job_type", ""),
	"source": "remotive",
	}
	jobs.append(job)

	print(f"🔍 Remotive Debug - Processed: {len(jobs)} jobs")
	except Exception as e:
	print(f"🔍 Remotive Debug - Error: {str(e)}")
	logging.error("Error fetching Remotive jobs: %s", e)

	return jobs

	def _fetch_adzuna_jobs(
	self, query: str, location: str, job_type: str
	) -> List[Dict[str, Any]]:
	"""Fetch jobs from the Adzuna API."""
	jobs = []

	if not self.settings.adzuna_app_id or not self.settings.adzuna_app_key:
	return jobs

	try:
	base_url = f"https://api.adzuna.com/v1/api/jobs/{self.settings.adzuna_country}/search/1"
	params = {
	"app_id": self.settings.adzuna_app_id,
	"app_key": self.settings.adzuna_app_key,
	"what": query,
	"where": location,
	"results_per_page": min(self.settings.max_jobs_per_search, 50),
	"content-type": "application/json",
	}
	response = requests.get(base_url, params=params, timeout=30)
	response.raise_for_status()
	data = response.json()

	for item in data.get("results", []):
	job = {
	"id": f"adzuna_{item.get('id')}",
	"title": item.get("title", ""),
	"company": item.get("company", {}).get("display_name", ""),
	"location": item.get("location", {}).get("display_name", ""),
	"description": item.get("description", ""),
	"requirements": item.get("description", ""),
	"salary": item.get("salary_is_predicted", ""),
	"url": item.get("redirect_url", ""),
	"posted_date": item.get("created", ""),
	"job_type": job_type or item.get("contract_time", ""),
	"source": "adzuna",
	}
	jobs.append(job)
	except Exception as e:
	logging.error("Error fetching Adzuna jobs: %s", e)

	return jobs

	def search_jobs(
	self, user_id: str, query: str = "", location: str = "", job_type: str = ""
	) -> Dict[str, Any]:
	"""
	Search for jobs and rank them by relevance to user profile.

	Args:
	user_id: User identifier for personalization
	query: Job search query/keywords
	location: Preferred job location
	job_type: Type of job (full-time, contract, remote, etc.)

	Returns:
	Dict with ranked job listings and fit scores
	"""
	try:
	# Get user profile
	profile = self.profile_service.get_profile(user_id)
	if not profile:
	return {
	"success": False,
	"message": "User profile not found. Please create a profile first.",
	}

	# Create cache key
	cache_key = f"{query}_{location}_{job_type}"
	current_time = datetime.now()

	# Check cache (expire after 1 hour)
	if cache_key in self.jobs_cache:
	cached_data = self.jobs_cache[cache_key]
	cache_time = datetime.fromisoformat(cached_data["timestamp"])
	if current_time - cache_time < timedelta(hours=1):
	fresh_jobs = cached_data["jobs"]
	else:
	fresh_jobs = self._fetch_fresh_jobs(query, location, job_type)
	self.jobs_cache[cache_key] = {
	"jobs": fresh_jobs,
	"timestamp": current_time.isoformat(),
	}
	self._save_cache()
	else:
	fresh_jobs = self._fetch_fresh_jobs(query, location, job_type)
	self.jobs_cache[cache_key] = {
	"jobs": fresh_jobs,
	"timestamp": current_time.isoformat(),
	}
	self._save_cache()

	if not fresh_jobs:
	return {
	"success": True,
	"jobs": [],
	"total_found": 0,
	"message": "No jobs found for the given criteria",
	"search_params": {
	"query": query,
	"location": location,
	"job_type": job_type,
	},
	}

	# Add jobs to embedding index
	self.embedding_service.add_job_embeddings(fresh_jobs)

	# Create profile text for matching
	profile_text = (
	f"{' '.join(profile.skills)} {profile.career_goals} {profile.resume}"
	)

	# Search for similar jobs
	similar_jobs = self.embedding_service.search_similar_jobs(
	profile_text, k=min(len(fresh_jobs), self.settings.max_jobs_per_search)
	)

	# Format results with fit scores
	ranked_jobs = []
	for job_meta, similarity_score in similar_jobs:
	# Find the full job data
	full_job = next(
	(job for job in fresh_jobs if job["id"] == job_meta["job_id"]), None
	)
	if full_job:
	# Calculate fit percentage (similarity score is cosine similarity)
	fit_percentage = max(0, min(100, int(similarity_score * 100)))

	ranked_job = {
	**full_job,
	"fit_score": fit_percentage,
	"match_reasons": self._get_match_reasons(
	profile, full_job, similarity_score
	),
	}
	ranked_jobs.append(ranked_job)

	# Sort by fit score
	ranked_jobs.sort(key=lambda x: x["fit_score"], reverse=True)

	return {
	"success": True,
	"jobs": ranked_jobs,
	"total_found": len(ranked_jobs),
	"search_params": {
	"query": query,
	"location": location,
	"job_type": job_type,
	},
	"user_profile": {
	"skills_count": len(profile.skills),
	"location": profile.location,
	},
	}

	except Exception as e:
	return {"success": False, "message": f"Error searching jobs: {str(e)}"}

	def _fetch_fresh_jobs(
	self, query: str, location: str, job_type: str
	) -> List[Dict[str, Any]]:
	"""Fetch fresh jobs from multiple sources."""
	all_jobs: List[Dict[str, Any]] = []

	# Remotive API
	remotive_jobs = self._fetch_remotive_jobs(query, location, job_type)
	all_jobs.extend(remotive_jobs)

	# Adzuna API
	adzuna_jobs = self._fetch_adzuna_jobs(query, location, job_type)
	all_jobs.extend(adzuna_jobs)

	# Sample jobs as fallback
	sample_jobs = self._fetch_sample_jobs(query, location, job_type)
	all_jobs.extend(sample_jobs)

	return all_jobs

	def _get_match_reasons(
	self, profile: UserProfile, job: Dict[str, Any], similarity_score: float
	) -> List[str]:
	"""Generate reasons why this job matches the user profile."""
	reasons = []

	# Check skill matches
	job_text = f"{job['title']} {job['description']} {job['requirements']}".lower()
	matching_skills = [
	skill for skill in profile.skills if skill.lower() in job_text
	]

	if matching_skills:
	reasons.append(f"Skills match: {', '.join(matching_skills[:3])}")

	# Check location preference
	if profile.location and profile.location.lower() in job["location"].lower():
	reasons.append("Location preference match")

	# Check job type preference
	if similarity_score > 0.8:
	reasons.append("High relevance to your background")
	elif similarity_score > 0.6:
	reasons.append("Good match for your experience")

	# Check career goals alignment
	if any(
	goal_word in job_text
	for goal_word in profile.career_goals.lower().split()[:5]
	):
	reasons.append("Aligns with career goals")

	return reasons[:3] # Limit to top 3 reasons