linkedin-webscrapper-beautifullsoap

Sleeping

File size: 3,195 Bytes

import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

def scrape_linkedin_jobs(keyword, location, num_pages=1):
    base_url = "https://www.linkedin.com/jobs/search/"
    jobs = []
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    for page in range(num_pages):
        params = {
            "keywords": keyword,
            "location": location,
            "start": page * 25
        }
        
        try:
            response = requests.get(base_url, params=params, headers=headers)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')
            
            job_cards = soup.find_all('div', class_='base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card')
            
            if not job_cards:
                st.warning(f"No job cards found on page {page + 1}. The page structure might have changed.")
                continue

            for card in job_cards:
                title = card.find('h3', class_='base-search-card__title')
                company = card.find('h4', class_='base-search-card__subtitle')
                location = card.find('span', class_='job-search-card__location')
                link = card.find('a', class_='base-card__full-link')
                
                if title and company and location and link:
                    jobs.append({
                        'Title': title.text.strip(),
                        'Company': company.text.strip(),
                        'Location': location.text.strip(),
                        'Link': link['href']
                    })
            
            time.sleep(random.uniform(1, 3))  # Random delay between requests
        
        except requests.RequestException as e:
            st.error(f"An error occurred while fetching page {page + 1}: {str(e)}")
            break

    return jobs

st.title("LinkedIn Job Scraper")

keyword = st.text_input("Enter job keyword:")
location = st.text_input("Enter location:")
num_pages = st.number_input("Number of pages to scrape:", min_value=1, max_value=10, value=1)

if st.button("Scrape Jobs"):
    if keyword and location:
        with st.spinner('Scraping jobs... This may take a moment.'):
            jobs = scrape_linkedin_jobs(keyword, location, num_pages)
        if jobs:
            df = pd.DataFrame(jobs)
            st.success(f"Found {len(jobs)} jobs!")
            st.dataframe(df)
            
            csv = df.to_csv(index=False)
            st.download_button(
                label="Download CSV",
                data=csv,
                file_name="linkedin_jobs.csv",
                mime="text/csv",
            )
        else:
            st.warning("No jobs found. Try different keywords or location.")
    else:
        st.warning("Please enter both keyword and location.")

st.markdown("---")
st.markdown("Note: This scraper is for educational purposes only. Please respect LinkedIn's terms of service.")