File size: 3,195 Bytes
73e8363 f2cc343 129c2a8 f2cc343 129c2a8 f2cc343 129c2a8 f2cc343 129c2a8 f2cc343 129c2a8 f2cc343 129c2a8 f2cc343 73e8363 129c2a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
def scrape_linkedin_jobs(keyword, location, num_pages=1):
base_url = "https://www.linkedin.com/jobs/search/"
jobs = []
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
for page in range(num_pages):
params = {
"keywords": keyword,
"location": location,
"start": page * 25
}
try:
response = requests.get(base_url, params=params, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
job_cards = soup.find_all('div', class_='base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card')
if not job_cards:
st.warning(f"No job cards found on page {page + 1}. The page structure might have changed.")
continue
for card in job_cards:
title = card.find('h3', class_='base-search-card__title')
company = card.find('h4', class_='base-search-card__subtitle')
location = card.find('span', class_='job-search-card__location')
link = card.find('a', class_='base-card__full-link')
if title and company and location and link:
jobs.append({
'Title': title.text.strip(),
'Company': company.text.strip(),
'Location': location.text.strip(),
'Link': link['href']
})
time.sleep(random.uniform(1, 3)) # Random delay between requests
except requests.RequestException as e:
st.error(f"An error occurred while fetching page {page + 1}: {str(e)}")
break
return jobs
st.title("LinkedIn Job Scraper")
keyword = st.text_input("Enter job keyword:")
location = st.text_input("Enter location:")
num_pages = st.number_input("Number of pages to scrape:", min_value=1, max_value=10, value=1)
if st.button("Scrape Jobs"):
if keyword and location:
with st.spinner('Scraping jobs... This may take a moment.'):
jobs = scrape_linkedin_jobs(keyword, location, num_pages)
if jobs:
df = pd.DataFrame(jobs)
st.success(f"Found {len(jobs)} jobs!")
st.dataframe(df)
csv = df.to_csv(index=False)
st.download_button(
label="Download CSV",
data=csv,
file_name="linkedin_jobs.csv",
mime="text/csv",
)
else:
st.warning("No jobs found. Try different keywords or location.")
else:
st.warning("Please enter both keyword and location.")
st.markdown("---")
st.markdown("Note: This scraper is for educational purposes only. Please respect LinkedIn's terms of service.") |