jobs-scraper / app.py
faridans27's picture
Update app.py
7e03e1f verified
import streamlit as st
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import time
# Instantiate global variables
df = pd.DataFrame(columns=["Title", "Location", "Company", "Link", "Description"])
# Get user input
inputJobTitle = st.text_input("Enter Job Title:")
inputJobLocation = st.text_input("Enter Job Location:")
totalPages = st.number_input("Enter Total Pages:", min_value=1, value=1)
submit_button = st.button("Submit")
def scrapeJobDescription(url):
options = Options()
options.add_argument("--window-size=1920,1080")
options.add_argument("--headless=new")
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(options=options)
driver.get(url)
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
try:
jobDescription = soup.find(
"div", class_="show-more-less-html__markup"
).text.strip()
return jobDescription
except:
return ""
def scrapeLinkedin():
global df
global inputJobTitle
global inputJobLocation
counter = 0
pageCounter = 1
options = Options()
options.add_argument("--window-size=1920,1080")
options.add_argument("--headless=new")
driver = webdriver.Chrome(options=options)
while pageCounter <= totalPages:
try:
driver.get(
f"https://www.linkedin.com/jobs/search/?&keywords={inputJobTitle}&location={inputJobLocation}&refresh=true&start={counter}"
)
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
ulElement = soup.find("ul", class_="jobs-search__results-list")
liElements = ulElement.find_all("li")
for item in liElements:
jobTitle = item.find(
"h3", class_="base-search-card__title"
).text.strip()
jobLocation = item.find(
"span", class_="job-search-card__location"
).text.strip()
jobCompany = item.find(
"h4", class_="base-search-card__subtitle"
).text.strip()
jobLink = item.find_all("a")[0]["href"]
jobDescription = scrapeJobDescription(jobLink)
if jobTitle and jobLocation and jobCompany and jobLink:
df = pd.concat(
[
df,
pd.DataFrame(
{
"Title": [jobTitle],
"Location": [jobLocation],
"Company": [jobCompany],
"Link": [jobLink],
"Description": [jobDescription],
}
),
]
)
counter += 25
pageCounter += 1
except:
break
driver.quit()
def convert_df(df):
return df.to_csv(index=False).encode('utf-8')
if submit_button:
with st.spinner("Operation in progress. Please wait..."):
scrapeLinkedin()
time.sleep(1)
st.write(df)
csv = convert_df(df)
st.download_button(
"Press to Download",
csv,
"file.csv",
"text/csv",
key='download-csv'
)