import streamlit as st import requests from bs4 import BeautifulSoup import pandas as pd import time import random def scrape_linkedin_jobs(keyword, location, num_pages=1): base_url = "https://www.linkedin.com/jobs/search/" jobs = [] headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } for page in range(num_pages): params = { "keywords": keyword, "location": location, "start": page * 25 } try: response = requests.get(base_url, params=params, headers=headers) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') job_cards = soup.find_all('div', class_='base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card') if not job_cards: st.warning(f"No job cards found on page {page + 1}. The page structure might have changed.") continue for card in job_cards: title = card.find('h3', class_='base-search-card__title') company = card.find('h4', class_='base-search-card__subtitle') location = card.find('span', class_='job-search-card__location') link = card.find('a', class_='base-card__full-link') if title and company and location and link: jobs.append({ 'Title': title.text.strip(), 'Company': company.text.strip(), 'Location': location.text.strip(), 'Link': link['href'] }) time.sleep(random.uniform(1, 3)) # Random delay between requests except requests.RequestException as e: st.error(f"An error occurred while fetching page {page + 1}: {str(e)}") break return jobs st.title("LinkedIn Job Scraper") keyword = st.text_input("Enter job keyword:") location = st.text_input("Enter location:") num_pages = st.number_input("Number of pages to scrape:", min_value=1, max_value=10, value=1) if st.button("Scrape Jobs"): if keyword and location: with st.spinner('Scraping jobs... This may take a moment.'): jobs = scrape_linkedin_jobs(keyword, location, num_pages) if jobs: df = pd.DataFrame(jobs) st.success(f"Found {len(jobs)} jobs!") st.dataframe(df) csv = df.to_csv(index=False) st.download_button( label="Download CSV", data=csv, file_name="linkedin_jobs.csv", mime="text/csv", ) else: st.warning("No jobs found. Try different keywords or location.") else: st.warning("Please enter both keyword and location.") st.markdown("---") st.markdown("Note: This scraper is for educational purposes only. Please respect LinkedIn's terms of service.")