π Add debug logging to JobSearchService for Remotive API responses, including URL, parameters, job count, and error handling.
9e0d988
"""Job search service for fetching and ranking job posts.""" | |
import json | |
import os | |
from datetime import datetime, timedelta | |
from typing import Any, Dict, List, Optional | |
from urllib.parse import quote | |
import logging | |
import requests | |
from bs4 import BeautifulSoup | |
from ..config import get_settings | |
from .embedding_service import EmbeddingService | |
from .profile_service import ProfileService, UserProfile | |
class JobSearchService: | |
"""Service for searching and ranking job posts.""" | |
def __init__(self): | |
self.settings = get_settings() | |
self.embedding_service = EmbeddingService() | |
self.profile_service = ProfileService() | |
self.jobs_cache = {} | |
self._load_cache() | |
def _load_cache(self): | |
"""Load jobs cache from file.""" | |
try: | |
if os.path.exists(self.settings.jobs_cache_path): | |
with open(self.settings.jobs_cache_path, "r", encoding="utf-8") as f: | |
self.jobs_cache = json.load(f) | |
except Exception as e: | |
print(f"Error loading jobs cache: {e}") | |
self.jobs_cache = {} | |
def _save_cache(self): | |
"""Save jobs cache to file.""" | |
try: | |
os.makedirs(os.path.dirname(self.settings.jobs_cache_path), exist_ok=True) | |
with open(self.settings.jobs_cache_path, "w", encoding="utf-8") as f: | |
json.dump(self.jobs_cache, f, indent=2, default=str) | |
except Exception as e: | |
print(f"Error saving jobs cache: {e}") | |
def _fetch_indeed_jobs( | |
self, query: str, location: str, job_type: str | |
) -> List[Dict[str, Any]]: | |
"""Fetch jobs from Indeed (web scraping - for demo purposes).""" | |
jobs = [] | |
try: | |
# Construct Indeed search URL | |
base_url = "https://www.indeed.com/jobs" | |
params = { | |
"q": query, | |
"l": location, | |
"jt": job_type | |
if job_type in ["fulltime", "parttime", "contract"] | |
else "", | |
"limit": min(self.settings.max_jobs_per_search, 50), | |
} | |
# Build URL | |
url = f"{base_url}?" + "&".join( | |
[f"{k}={quote(str(v))}" for k, v in params.items() if v] | |
) | |
# Headers to mimic browser request | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
} | |
# Make request with timeout | |
response = requests.get(url, headers=headers, timeout=10) | |
response.raise_for_status() | |
# Parse HTML | |
soup = BeautifulSoup(response.content, "html.parser") | |
# Find job cards (Indeed's structure may change) | |
job_cards = soup.find_all("div", class_="job_seen_beacon") | |
for i, card in enumerate(job_cards[: self.settings.max_jobs_per_search]): | |
try: | |
# Extract job information | |
title_elem = card.find("h2", class_="jobTitle") | |
title = ( | |
title_elem.get_text(strip=True) | |
if title_elem | |
else f"Position {i + 1}" | |
) | |
company_elem = card.find("span", class_="companyName") | |
company = ( | |
company_elem.get_text(strip=True) | |
if company_elem | |
else "Unknown Company" | |
) | |
location_elem = card.find("div", class_="companyLocation") | |
job_location = ( | |
location_elem.get_text(strip=True) | |
if location_elem | |
else location | |
) | |
summary_elem = card.find("div", class_="job-snippet") | |
summary = summary_elem.get_text(strip=True) if summary_elem else "" | |
# Try to get job URL | |
link_elem = title_elem.find("a") if title_elem else None | |
job_url = ( | |
f"https://www.indeed.com{link_elem['href']}" | |
if link_elem and link_elem.get("href") | |
else "" | |
) | |
job = { | |
"id": f"indeed_{i}_{hash(title + company)}", | |
"title": title, | |
"company": company, | |
"location": job_location, | |
"description": summary, | |
"requirements": "", # Would need individual job page scraping | |
"salary": "", # Would need more detailed scraping | |
"url": job_url, | |
"posted_date": datetime.now().isoformat(), | |
"job_type": job_type or "full-time", | |
"source": "indeed", | |
} | |
jobs.append(job) | |
except Exception as e: | |
print(f"Error parsing job card {i}: {e}") | |
continue | |
except Exception as e: | |
print(f"Error fetching Indeed jobs: {e}") | |
return jobs | |
def _fetch_sample_jobs( | |
self, query: str, location: str, job_type: str | |
) -> List[Dict[str, Any]]: | |
"""Generate sample jobs for demo purposes.""" | |
sample_jobs = [ | |
{ | |
"id": "sample_1", | |
"title": f"Senior {query} Developer", | |
"company": "TechCorp Solutions", | |
"location": location or "Remote", | |
"description": f"We are looking for an experienced {query} developer to join our dynamic team. You will work on cutting-edge projects using modern technologies and best practices.", | |
"requirements": f"5+ years experience with {query}, strong problem-solving skills, team collaboration", | |
"salary": "$80,000 - $120,000", | |
"url": "https://example.com/job1", | |
"posted_date": datetime.now().isoformat(), | |
"job_type": job_type or "full-time", | |
"source": "sample", | |
}, | |
{ | |
"id": "sample_2", | |
"title": f"{query} Engineer", | |
"company": "InnovateLabs", | |
"location": location or "New York, NY", | |
"description": f"Join our team as a {query} engineer and help build the next generation of applications. Great opportunity for growth and learning.", | |
"requirements": f"3+ years {query} experience, bachelor's degree preferred, excellent communication skills", | |
"salary": "$70,000 - $95,000", | |
"url": "https://example.com/job2", | |
"posted_date": (datetime.now() - timedelta(days=1)).isoformat(), | |
"job_type": job_type or "full-time", | |
"source": "sample", | |
}, | |
{ | |
"id": "sample_3", | |
"title": f"Junior {query} Specialist", | |
"company": "StartupXYZ", | |
"location": location or "San Francisco, CA", | |
"description": f"Great entry-level opportunity for {query} enthusiasts. We offer mentorship, training, and a collaborative environment.", | |
"requirements": f"1-2 years {query} experience, willingness to learn, passion for technology", | |
"salary": "$55,000 - $75,000", | |
"url": "https://example.com/job3", | |
"posted_date": (datetime.now() - timedelta(days=2)).isoformat(), | |
"job_type": job_type or "full-time", | |
"source": "sample", | |
}, | |
] | |
return sample_jobs | |
def _fetch_remotive_jobs( | |
self, query: str, location: str, job_type: str | |
) -> List[Dict[str, Any]]: | |
"""Fetch jobs from the Remotive API.""" | |
jobs = [] | |
try: | |
params = {"search": query} | |
if location: | |
params["location"] = location | |
if job_type: | |
params["job_type"] = job_type | |
print(f"π Remotive Debug - URL: {self.settings.remotive_api_url}") | |
print(f"π Remotive Debug - Params: {params}") | |
response = requests.get( | |
self.settings.remotive_api_url, params=params, timeout=30 | |
) | |
response.raise_for_status() | |
data = response.json() | |
remotive_jobs = data.get("jobs", []) | |
print(f"π Remotive Debug - Response: {len(remotive_jobs)} jobs found") | |
for item in remotive_jobs: | |
job = { | |
"id": f"remotive_{item.get('id')}", | |
"title": item.get("title", ""), | |
"company": item.get("company_name", ""), | |
"location": item.get("candidate_required_location", ""), | |
"description": item.get("description", ""), | |
"requirements": "", | |
"salary": item.get("salary", ""), | |
"url": item.get("url", ""), | |
"posted_date": item.get("publication_date", ""), | |
"job_type": job_type or item.get("job_type", ""), | |
"source": "remotive", | |
} | |
jobs.append(job) | |
print(f"π Remotive Debug - Processed: {len(jobs)} jobs") | |
except Exception as e: | |
print(f"π Remotive Debug - Error: {str(e)}") | |
logging.error("Error fetching Remotive jobs: %s", e) | |
return jobs | |
def _fetch_adzuna_jobs( | |
self, query: str, location: str, job_type: str | |
) -> List[Dict[str, Any]]: | |
"""Fetch jobs from the Adzuna API.""" | |
jobs = [] | |
if not self.settings.adzuna_app_id or not self.settings.adzuna_app_key: | |
return jobs | |
try: | |
base_url = f"https://api.adzuna.com/v1/api/jobs/{self.settings.adzuna_country}/search/1" | |
params = { | |
"app_id": self.settings.adzuna_app_id, | |
"app_key": self.settings.adzuna_app_key, | |
"what": query, | |
"where": location, | |
"results_per_page": min(self.settings.max_jobs_per_search, 50), | |
"content-type": "application/json", | |
} | |
response = requests.get(base_url, params=params, timeout=30) | |
response.raise_for_status() | |
data = response.json() | |
for item in data.get("results", []): | |
job = { | |
"id": f"adzuna_{item.get('id')}", | |
"title": item.get("title", ""), | |
"company": item.get("company", {}).get("display_name", ""), | |
"location": item.get("location", {}).get("display_name", ""), | |
"description": item.get("description", ""), | |
"requirements": item.get("description", ""), | |
"salary": item.get("salary_is_predicted", ""), | |
"url": item.get("redirect_url", ""), | |
"posted_date": item.get("created", ""), | |
"job_type": job_type or item.get("contract_time", ""), | |
"source": "adzuna", | |
} | |
jobs.append(job) | |
except Exception as e: | |
logging.error("Error fetching Adzuna jobs: %s", e) | |
return jobs | |
def search_jobs( | |
self, user_id: str, query: str = "", location: str = "", job_type: str = "" | |
) -> Dict[str, Any]: | |
""" | |
Search for jobs and rank them by relevance to user profile. | |
Args: | |
user_id: User identifier for personalization | |
query: Job search query/keywords | |
location: Preferred job location | |
job_type: Type of job (full-time, contract, remote, etc.) | |
Returns: | |
Dict with ranked job listings and fit scores | |
""" | |
try: | |
# Get user profile | |
profile = self.profile_service.get_profile(user_id) | |
if not profile: | |
return { | |
"success": False, | |
"message": "User profile not found. Please create a profile first.", | |
} | |
# Create cache key | |
cache_key = f"{query}_{location}_{job_type}" | |
current_time = datetime.now() | |
# Check cache (expire after 1 hour) | |
if cache_key in self.jobs_cache: | |
cached_data = self.jobs_cache[cache_key] | |
cache_time = datetime.fromisoformat(cached_data["timestamp"]) | |
if current_time - cache_time < timedelta(hours=1): | |
fresh_jobs = cached_data["jobs"] | |
else: | |
fresh_jobs = self._fetch_fresh_jobs(query, location, job_type) | |
self.jobs_cache[cache_key] = { | |
"jobs": fresh_jobs, | |
"timestamp": current_time.isoformat(), | |
} | |
self._save_cache() | |
else: | |
fresh_jobs = self._fetch_fresh_jobs(query, location, job_type) | |
self.jobs_cache[cache_key] = { | |
"jobs": fresh_jobs, | |
"timestamp": current_time.isoformat(), | |
} | |
self._save_cache() | |
if not fresh_jobs: | |
return { | |
"success": True, | |
"jobs": [], | |
"total_found": 0, | |
"message": "No jobs found for the given criteria", | |
"search_params": { | |
"query": query, | |
"location": location, | |
"job_type": job_type, | |
}, | |
} | |
# Add jobs to embedding index | |
self.embedding_service.add_job_embeddings(fresh_jobs) | |
# Create profile text for matching | |
profile_text = ( | |
f"{' '.join(profile.skills)} {profile.career_goals} {profile.resume}" | |
) | |
# Search for similar jobs | |
similar_jobs = self.embedding_service.search_similar_jobs( | |
profile_text, k=min(len(fresh_jobs), self.settings.max_jobs_per_search) | |
) | |
# Format results with fit scores | |
ranked_jobs = [] | |
for job_meta, similarity_score in similar_jobs: | |
# Find the full job data | |
full_job = next( | |
(job for job in fresh_jobs if job["id"] == job_meta["job_id"]), None | |
) | |
if full_job: | |
# Calculate fit percentage (similarity score is cosine similarity) | |
fit_percentage = max(0, min(100, int(similarity_score * 100))) | |
ranked_job = { | |
**full_job, | |
"fit_score": fit_percentage, | |
"match_reasons": self._get_match_reasons( | |
profile, full_job, similarity_score | |
), | |
} | |
ranked_jobs.append(ranked_job) | |
# Sort by fit score | |
ranked_jobs.sort(key=lambda x: x["fit_score"], reverse=True) | |
return { | |
"success": True, | |
"jobs": ranked_jobs, | |
"total_found": len(ranked_jobs), | |
"search_params": { | |
"query": query, | |
"location": location, | |
"job_type": job_type, | |
}, | |
"user_profile": { | |
"skills_count": len(profile.skills), | |
"location": profile.location, | |
}, | |
} | |
except Exception as e: | |
return {"success": False, "message": f"Error searching jobs: {str(e)}"} | |
def _fetch_fresh_jobs( | |
self, query: str, location: str, job_type: str | |
) -> List[Dict[str, Any]]: | |
"""Fetch fresh jobs from multiple sources.""" | |
all_jobs: List[Dict[str, Any]] = [] | |
# Remotive API | |
remotive_jobs = self._fetch_remotive_jobs(query, location, job_type) | |
all_jobs.extend(remotive_jobs) | |
# Adzuna API | |
adzuna_jobs = self._fetch_adzuna_jobs(query, location, job_type) | |
all_jobs.extend(adzuna_jobs) | |
# Sample jobs as fallback | |
sample_jobs = self._fetch_sample_jobs(query, location, job_type) | |
all_jobs.extend(sample_jobs) | |
return all_jobs | |
def _get_match_reasons( | |
self, profile: UserProfile, job: Dict[str, Any], similarity_score: float | |
) -> List[str]: | |
"""Generate reasons why this job matches the user profile.""" | |
reasons = [] | |
# Check skill matches | |
job_text = f"{job['title']} {job['description']} {job['requirements']}".lower() | |
matching_skills = [ | |
skill for skill in profile.skills if skill.lower() in job_text | |
] | |
if matching_skills: | |
reasons.append(f"Skills match: {', '.join(matching_skills[:3])}") | |
# Check location preference | |
if profile.location and profile.location.lower() in job["location"].lower(): | |
reasons.append("Location preference match") | |
# Check job type preference | |
if similarity_score > 0.8: | |
reasons.append("High relevance to your background") | |
elif similarity_score > 0.6: | |
reasons.append("Good match for your experience") | |
# Check career goals alignment | |
if any( | |
goal_word in job_text | |
for goal_word in profile.career_goals.lower().split()[:5] | |
): | |
reasons.append("Aligns with career goals") | |
return reasons[:3] # Limit to top 3 reasons | |