from selenium import webdriver from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import TimeoutException, NoSuchElementException import pandas as pd import time class ScrapingCarmudi: def __init__(self, link, total_pages=20): self.link = link self.total_pages = total_pages self.driver = None self.wait = None def webdriver_connect(self): options = Options() options.add_argument('--ignore-certificate-errors') options.add_argument("--disable-web-security") self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) self.wait = WebDriverWait(self.driver, 20) def lets_scraping_toweb(self): self.webdriver_connect() self.driver.get(self.link) cari_button = self.wait.until(EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'search-button')]/button"))) cari_button.click() time.sleep(3) df = pd.DataFrame({}) index = 0 try: while index < self.total_pages: print(f'Scraping page {index + 1}...') self.wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "js-ellipsize-text"))) car_names = self.wait.until( EC.presence_of_all_elements_located((By.CLASS_NAME, "js-ellipsize-text")) ) car_prices = self.wait.until( EC.presence_of_all_elements_located((By.CLASS_NAME, "listing__price")) ) for name, price in zip(car_names, car_prices): car_name = name.text.strip() car_price = price.text.strip() print(car_name, car_price) temp_df = pd.DataFrame({'car_names': [car_name], 'car_prices': [car_price]}) df = pd.concat([df, temp_df], ignore_index=True) try: close_btn = WebDriverWait(self.driver, 5).until( EC.element_to_be_clickable((By.CSS_SELECTOR, ".modal__destroy.b-close.close--menu")) ) self.driver.execute_script("arguments[0].click();", close_btn) print("Popup modal ditutup.") except TimeoutException: pass try: next_button = self.wait.until( EC.element_to_be_clickable((By.XPATH, "//li[contains(@class, 'next')]/a")) ) self.driver.execute_script("arguments[0].click();", next_button) print("Klik tombol Selanjutnya.") except (TimeoutException, NoSuchElementException): print("Tombol 'Selanjutnya' tidak ditemukan, berhenti di halaman ini.") break time.sleep(3) index += 1 df.to_csv('results.csv', index=False) finally: df.to_csv('results.csv', index=False) self.driver.quit()