AmnaHassan commited on
Commit
f2cc343
·
verified ·
1 Parent(s): 11e7435

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -48
app.py CHANGED
@@ -1,53 +1,61 @@
1
  import streamlit as st
2
  import requests
3
  from bs4 import BeautifulSoup
4
-
5
- # Function to scrape LinkedIn job titles
6
- def scrape_linkedin_jobs(keyword):
7
- # Construct the URL
8
- url = f'https://www.linkedin.com/jobs/search/?keywords={keyword}'
9
-
10
- # Define headers to mimic a web browser
11
- headers = {
12
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
13
- 'Accept-Language': 'en-US,en;q=0.9',
14
- 'Accept-Encoding': 'gzip, deflate, br',
15
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
16
- 'Connection': 'keep-alive',
17
- }
18
-
19
- # Make the request to the URL
20
- response = requests.get(url, headers=headers)
21
-
22
- # Check if the request was successful
23
- if response.status_code == 200:
24
- # Parse the HTML content
25
  soup = BeautifulSoup(response.content, 'html.parser')
26
-
27
- # Debug: Show the raw HTML content
28
- st.code(soup.prettify()[:1000]) # Display first 1000 characters for inspection
29
-
30
- # Example: Extract job titles
31
- job_titles = soup.find_all('span', class_='screen-reader-text')
32
- return [title.get_text().strip() for title in job_titles if 'Apply' not in title.get_text()]
33
- else:
34
- return f'Failed to retrieve content: {response.status_code}'
35
-
36
- # Streamlit app
37
- st.title('LinkedIn Job Scraper')
38
-
39
- # Input for job keyword
40
- keyword = st.text_input('Enter job keyword', '')
41
-
42
- if st.button('Scrape Jobs'):
43
- if keyword:
44
- with st.spinner('Scraping job titles...'):
45
- job_titles = scrape_linkedin_jobs(keyword)
46
- if isinstance(job_titles, list):
47
- st.write(f'Found {len(job_titles)} job titles:')
48
- for title in job_titles:
49
- st.write(f'- {title}')
50
- else:
51
- st.write(job_titles)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  else:
53
- st.write('Please enter a job keyword.')
 
1
  import streamlit as st
2
  import requests
3
  from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+
6
+ def scrape_linkedin_jobs(keyword, location, num_pages=1):
7
+ base_url = "https://www.linkedin.com/jobs/search/"
8
+ jobs = []
9
+
10
+ for page in range(num_pages):
11
+ params = {
12
+ "keywords": keyword,
13
+ "location": location,
14
+ "start": page * 25
15
+ }
16
+
17
+ response = requests.get(base_url, params=params)
 
 
 
 
 
 
 
18
  soup = BeautifulSoup(response.content, 'html.parser')
19
+
20
+ job_cards = soup.find_all('div', class_='base-card')
21
+
22
+ for card in job_cards:
23
+ title = card.find('h3', class_='base-search-card__title')
24
+ company = card.find('h4', class_='base-search-card__subtitle')
25
+ location = card.find('span', class_='job-search-card__location')
26
+ link = card.find('a', class_='base-card__full-link')
27
+
28
+ if title and company and location and link:
29
+ jobs.append({
30
+ 'Title': title.text.strip(),
31
+ 'Company': company.text.strip(),
32
+ 'Location': location.text.strip(),
33
+ 'Link': link['href']
34
+ })
35
+
36
+ return jobs
37
+
38
+ st.title("LinkedIn Job Scraper")
39
+
40
+ keyword = st.text_input("Enter job keyword:")
41
+ location = st.text_input("Enter location:")
42
+ num_pages = st.number_input("Number of pages to scrape:", min_value=1, max_value=10, value=1)
43
+
44
+ if st.button("Scrape Jobs"):
45
+ if keyword and location:
46
+ jobs = scrape_linkedin_jobs(keyword, location, num_pages)
47
+ if jobs:
48
+ df = pd.DataFrame(jobs)
49
+ st.dataframe(df)
50
+
51
+ csv = df.to_csv(index=False)
52
+ st.download_button(
53
+ label="Download CSV",
54
+ data=csv,
55
+ file_name="linkedin_jobs.csv",
56
+ mime="text/csv",
57
+ )
58
+ else:
59
+ st.warning("No jobs found. Try different keywords or location.")
60
  else:
61
+ st.warning("Please enter both keyword and location.")