Andy Lee commited on
Commit
23ee129
·
1 Parent(s): bf35ece

fix: headless mode, do some hacks

Browse files
Files changed (4) hide show
  1. mapcrunch_controller.py +37 -5
  2. pyproject.toml +1 -0
  3. requirements.txt +1 -0
  4. uv.lock +55 -0
mapcrunch_controller.py CHANGED
@@ -1,7 +1,7 @@
1
  import time
2
  from typing import Dict, Optional, List
3
 
4
- from selenium import webdriver
5
  from selenium.webdriver.support.ui import WebDriverWait
6
  from selenium.webdriver.support import expected_conditions as EC
7
  from selenium.webdriver.common.by import By
@@ -11,12 +11,44 @@ from config import MAPCRUNCH_URL, SELECTORS, DATA_COLLECTION_CONFIG
11
 
12
  class MapCrunchController:
13
  def __init__(self, headless: bool = False):
14
- options = webdriver.ChromeOptions()
15
- if headless:
16
- options.add_argument("--headless")
 
17
  options.add_argument("--window-size=1920,1080")
18
- self.driver = webdriver.Chrome(options=options)
 
 
 
 
 
19
  self.wait = WebDriverWait(self.driver, 10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  self.driver.get(MAPCRUNCH_URL)
21
  time.sleep(3)
22
 
 
1
  import time
2
  from typing import Dict, Optional, List
3
 
4
+ import undetected_chromedriver as uc
5
  from selenium.webdriver.support.ui import WebDriverWait
6
  from selenium.webdriver.support import expected_conditions as EC
7
  from selenium.webdriver.common.by import By
 
11
 
12
  class MapCrunchController:
13
  def __init__(self, headless: bool = False):
14
+ options = uc.ChromeOptions()
15
+ options.add_argument(
16
+ "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
17
+ )
18
  options.add_argument("--window-size=1920,1080")
19
+ options.set_capability("goog:loggingPrefs", {"browser": "ALL"})
20
+
21
+ if headless:
22
+ options.add_argument("--headless=new")
23
+
24
+ self.driver = uc.Chrome(options=options, use_subprocess=True)
25
  self.wait = WebDriverWait(self.driver, 10)
26
+
27
+ # Here we are injecting a script to the page to disable the browser detection.
28
+ # Basically, we are setting the badBrowser property to 0, which is a property that is used to detect if the browser is being controlled by a script.
29
+ # In the main.min.js, we can see some js code like this:
30
+ # if (badBrowser) {
31
+ # alert("Unsupported browser!");
32
+ # } else {
33
+ # window.panorama = { ... }
34
+ # }
35
+ self.driver.execute_cdp_cmd(
36
+ "Page.addScriptToEvaluateOnNewDocument",
37
+ {
38
+ "source": """
39
+ Object.defineProperty(window, 'badBrowser', {
40
+ value: 0,
41
+ writable: false,
42
+ configurable: false
43
+ });
44
+ window.alert = function() {};
45
+ Object.defineProperty(navigator, 'webdriver', {
46
+ get: () => undefined
47
+ });
48
+ """
49
+ },
50
+ )
51
+
52
  self.driver.get(MAPCRUNCH_URL)
53
  time.sleep(3)
54
 
pyproject.toml CHANGED
@@ -116,6 +116,7 @@ dependencies = [
116
  "tqdm==4.66.5",
117
  "traitlets==5.14.3",
118
  "typing-extensions==4.12.2",
 
119
  "uritemplate==4.1.1",
120
  "urllib3==2.2.3",
121
  "wcwidth==0.2.13",
 
116
  "tqdm==4.66.5",
117
  "traitlets==5.14.3",
118
  "typing-extensions==4.12.2",
119
+ "undetected-chromedriver>=3.5.5",
120
  "uritemplate==4.1.1",
121
  "urllib3==2.2.3",
122
  "wcwidth==0.2.13",
requirements.txt CHANGED
@@ -148,3 +148,4 @@ wcwidth==0.2.13
148
  websocket-client==1.8.0
149
  wsproto==1.2.0
150
  yarl==1.15.5
 
 
148
  websocket-client==1.8.0
149
  wsproto==1.2.0
150
  yarl==1.15.5
151
+ undetected-chromedriver>=3.5.5
uv.lock CHANGED
@@ -2575,6 +2575,7 @@ dependencies = [
2575
  { name = "tqdm" },
2576
  { name = "traitlets" },
2577
  { name = "typing-extensions" },
 
2578
  { name = "uritemplate" },
2579
  { name = "urllib3" },
2580
  { name = "wcwidth" },
@@ -2694,6 +2695,7 @@ requires-dist = [
2694
  { name = "tqdm", specifier = "==4.66.5" },
2695
  { name = "traitlets", specifier = "==5.14.3" },
2696
  { name = "typing-extensions", specifier = "==4.12.2" },
 
2697
  { name = "uritemplate", specifier = "==4.1.1" },
2698
  { name = "urllib3", specifier = "==2.2.3" },
2699
  { name = "wcwidth", specifier = "==0.2.13" },
@@ -2987,6 +2989,17 @@ wheels = [
2987
  { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 },
2988
  ]
2989
 
 
 
 
 
 
 
 
 
 
 
 
2990
  [[package]]
2991
  name = "uritemplate"
2992
  version = "4.1.1"
@@ -3046,6 +3059,48 @@ wheels = [
3046
  { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 },
3047
  ]
3048
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3049
  [[package]]
3050
  name = "wsproto"
3051
  version = "1.2.0"
 
2575
  { name = "tqdm" },
2576
  { name = "traitlets" },
2577
  { name = "typing-extensions" },
2578
+ { name = "undetected-chromedriver" },
2579
  { name = "uritemplate" },
2580
  { name = "urllib3" },
2581
  { name = "wcwidth" },
 
2695
  { name = "tqdm", specifier = "==4.66.5" },
2696
  { name = "traitlets", specifier = "==5.14.3" },
2697
  { name = "typing-extensions", specifier = "==4.12.2" },
2698
+ { name = "undetected-chromedriver", specifier = ">=3.5.5" },
2699
  { name = "uritemplate", specifier = "==4.1.1" },
2700
  { name = "urllib3", specifier = "==2.2.3" },
2701
  { name = "wcwidth", specifier = "==0.2.13" },
 
2989
  { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 },
2990
  ]
2991
 
2992
+ [[package]]
2993
+ name = "undetected-chromedriver"
2994
+ version = "3.5.5"
2995
+ source = { registry = "https://pypi.org/simple" }
2996
+ dependencies = [
2997
+ { name = "requests" },
2998
+ { name = "selenium" },
2999
+ { name = "websockets" },
3000
+ ]
3001
+ sdist = { url = "https://files.pythonhosted.org/packages/1f/98/7ab46625ce2317756e4e857fe6ac24b6378c3e8f477da26c95226ed8ccb7/undetected-chromedriver-3.5.5.tar.gz", hash = "sha256:9f945e1435005247abe17de316bcfda85b284a4177fd5f25167c78ced33b65ec", size = 65409 }
3002
+
3003
  [[package]]
3004
  name = "uritemplate"
3005
  version = "4.1.1"
 
3059
  { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 },
3060
  ]
3061
 
3062
+ [[package]]
3063
+ name = "websockets"
3064
+ version = "15.0.1"
3065
+ source = { registry = "https://pypi.org/simple" }
3066
+ sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016 }
3067
+ wheels = [
3068
+ { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423 },
3069
+ { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082 },
3070
+ { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330 },
3071
+ { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878 },
3072
+ { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883 },
3073
+ { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252 },
3074
+ { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521 },
3075
+ { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958 },
3076
+ { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918 },
3077
+ { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388 },
3078
+ { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828 },
3079
+ { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437 },
3080
+ { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096 },
3081
+ { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332 },
3082
+ { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152 },
3083
+ { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096 },
3084
+ { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523 },
3085
+ { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790 },
3086
+ { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165 },
3087
+ { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160 },
3088
+ { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395 },
3089
+ { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841 },
3090
+ { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440 },
3091
+ { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098 },
3092
+ { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329 },
3093
+ { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111 },
3094
+ { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054 },
3095
+ { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496 },
3096
+ { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829 },
3097
+ { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217 },
3098
+ { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195 },
3099
+ { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393 },
3100
+ { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837 },
3101
+ { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743 },
3102
+ ]
3103
+
3104
  [[package]]
3105
  name = "wsproto"
3106
  version = "1.2.0"