linkedin-webscrapper-beautifullsoap

Sleeping

App Files Files Community

AmnaHassan commited on Jul 27, 2024

Commit

f2cc343

verified ·

1 Parent(s): 11e7435

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -48

app.py CHANGED Viewed

@@ -1,53 +1,61 @@
 import streamlit as st
 import requests
 from bs4 import BeautifulSoup
-# Function to scrape LinkedIn job titles
-def scrape_linkedin_jobs(keyword):
-    # Construct the URL
-    url = f'https://www.linkedin.com/jobs/search/?keywords={keyword}'
-    # Define headers to mimic a web browser
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-        'Accept-Language': 'en-US,en;q=0.9',
-        'Accept-Encoding': 'gzip, deflate, br',
-        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
-        'Connection': 'keep-alive',
-    }
-    # Make the request to the URL
-    response = requests.get(url, headers=headers)
-    # Check if the request was successful
-    if response.status_code == 200:
-        # Parse the HTML content
         soup = BeautifulSoup(response.content, 'html.parser')
-        # Debug: Show the raw HTML content
-        st.code(soup.prettify()[:1000])  # Display first 1000 characters for inspection
-        # Example: Extract job titles
-        job_titles = soup.find_all('span', class_='screen-reader-text')
-        return [title.get_text().strip() for title in job_titles if 'Apply' not in title.get_text()]
-    else:
-        return f'Failed to retrieve content: {response.status_code}'
-# Streamlit app
-st.title('LinkedIn Job Scraper')
-# Input for job keyword
-keyword = st.text_input('Enter job keyword', '')
-if st.button('Scrape Jobs'):
-    if keyword:
-        with st.spinner('Scraping job titles...'):
-            job_titles = scrape_linkedin_jobs(keyword)
-            if isinstance(job_titles, list):
-                st.write(f'Found {len(job_titles)} job titles:')
-                for title in job_titles:
-                    st.write(f'- {title}')
-            else:
-                st.write(job_titles)
     else:
-        st.write('Please enter a job keyword.')

 import streamlit as st
 import requests
 from bs4 import BeautifulSoup
+import pandas as pd
+def scrape_linkedin_jobs(keyword, location, num_pages=1):
+    base_url = "https://www.linkedin.com/jobs/search/"
+    jobs = []
+    for page in range(num_pages):
+        params = {
+            "keywords": keyword,
+            "location": location,
+            "start": page * 25
+        }
+        response = requests.get(base_url, params=params)
         soup = BeautifulSoup(response.content, 'html.parser')
+        job_cards = soup.find_all('div', class_='base-card')
+        for card in job_cards:
+            title = card.find('h3', class_='base-search-card__title')
+            company = card.find('h4', class_='base-search-card__subtitle')
+            location = card.find('span', class_='job-search-card__location')
+            link = card.find('a', class_='base-card__full-link')
+            if title and company and location and link:
+                jobs.append({
+                    'Title': title.text.strip(),
+                    'Company': company.text.strip(),
+                    'Location': location.text.strip(),
+                    'Link': link['href']
+                })
+    return jobs
+st.title("LinkedIn Job Scraper")
+keyword = st.text_input("Enter job keyword:")
+location = st.text_input("Enter location:")
+num_pages = st.number_input("Number of pages to scrape:", min_value=1, max_value=10, value=1)
+if st.button("Scrape Jobs"):
+    if keyword and location:
+        jobs = scrape_linkedin_jobs(keyword, location, num_pages)
+        if jobs:
+            df = pd.DataFrame(jobs)
+            st.dataframe(df)
+            csv = df.to_csv(index=False)
+            st.download_button(
+                label="Download CSV",
+                data=csv,
+                file_name="linkedin_jobs.csv",
+                mime="text/csv",
+            )
+        else:
+            st.warning("No jobs found. Try different keywords or location.")
     else:
+        st.warning("Please enter both keyword and location.")