Andy Lee commited on
Commit
f83d6df
·
1 Parent(s): 5558ad1

Merge pull request #1 from yichuan520030910320/mapcrunch

Browse files
Files changed (16) hide show
  1. .DS_Store +0 -0
  2. .gitignore +4 -1
  3. .python-version +1 -0
  4. benchmark.py +227 -0
  5. config.py +92 -0
  6. data_collector.py +498 -0
  7. geo_bot.py +208 -0
  8. geoguessr_bot.py +0 -200
  9. main.py +255 -0
  10. mapcrunch_controller.py +272 -0
  11. pyproject.toml +122 -0
  12. readme.md +2 -4
  13. requirements.txt +0 -0
  14. select_regions.py +0 -66
  15. uv.lock +0 -0
  16. view_data_collect.py +265 -0
.DS_Store DELETED
Binary file (6.15 kB)
 
.gitignore CHANGED
@@ -1,3 +1,6 @@
1
  venv/
2
  .env
3
- __pycache__
 
 
 
 
1
  venv/
2
  .env
3
+ __pycache__
4
+ .DS_Store
5
+ data/
6
+ results/
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
benchmark.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # benchmark.py (Final Version)
2
+
3
+ import os
4
+ import json
5
+ import time
6
+ from datetime import datetime
7
+ from typing import List, Dict, Optional, Tuple
8
+ from pathlib import Path
9
+ import math
10
+
11
+ from geo_bot import GeoBot
12
+ from config import DATA_PATHS, MODELS_CONFIG, SUCCESS_THRESHOLD_KM
13
+
14
+
15
+ class MapGuesserBenchmark:
16
+ def __init__(self, headless: bool = False):
17
+ self.headless = headless
18
+ self.golden_labels = self.load_golden_labels()
19
+ print(f"📊 Loaded {len(self.golden_labels)} golden label samples")
20
+
21
+ def load_golden_labels(self) -> List[Dict]:
22
+ try:
23
+ with open(DATA_PATHS["golden_labels"], "r") as f:
24
+ return json.load(f).get("samples", [])
25
+ except Exception:
26
+ return []
27
+
28
+ def get_model_class(self, model_name: str):
29
+ config = MODELS_CONFIG.get(model_name)
30
+ if not config:
31
+ raise ValueError(f"Unknown model: {model_name}")
32
+ class_name, model_class_name = config["class"], config["model_name"]
33
+ if class_name == "ChatOpenAI":
34
+ from langchain_openai import ChatOpenAI
35
+
36
+ return ChatOpenAI, model_class_name
37
+ if class_name == "ChatAnthropic":
38
+ from langchain_anthropic import ChatAnthropic
39
+
40
+ return ChatAnthropic, model_class_name
41
+ if class_name == "ChatGoogleGenerativeAI":
42
+ from langchain_google_genai import ChatGoogleGenerativeAI
43
+
44
+ return ChatGoogleGenerativeAI, model_class_name
45
+ raise ValueError(f"Unknown model class: {class_name}")
46
+
47
+ def calculate_distance(
48
+ self, true_coords: Dict, predicted_coords: Optional[Tuple[float, float]]
49
+ ) -> Optional[float]:
50
+ """Calculates distance between true (lat,lon) and predicted (lat,lon)."""
51
+ if not predicted_coords:
52
+ return None
53
+ try:
54
+ true_lat, true_lng = true_coords["lat"], true_coords["lng"]
55
+ pred_lat, pred_lng = predicted_coords
56
+
57
+ R = 6371
58
+ lat1, lon1, lat2, lon2 = map(
59
+ math.radians, [true_lat, true_lng, pred_lat, pred_lng]
60
+ )
61
+ dlat = lat2 - lat1
62
+ dlon = lon2 - lon1
63
+ a = (
64
+ math.sin(dlat / 2) ** 2
65
+ + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
66
+ )
67
+ c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
68
+ return R * c
69
+ except (TypeError, KeyError, IndexError) as e:
70
+ print(f"Error in distance calculation: {e}")
71
+ return None
72
+
73
+ def run_benchmark(
74
+ self,
75
+ models: Optional[List[str]] = None,
76
+ max_samples: Optional[int] = None,
77
+ **kwargs,
78
+ ) -> Dict:
79
+ if not self.golden_labels:
80
+ raise ValueError("No golden labels available.")
81
+
82
+ models_to_test = models or list(MODELS_CONFIG.keys())
83
+ test_samples = self.golden_labels[:max_samples]
84
+
85
+ print(f"🚀 Starting LIVE benchmark:")
86
+ print(f" Models: {models_to_test}")
87
+ print(f" Samples: {len(test_samples)}")
88
+
89
+ all_results = []
90
+ for model_name in models_to_test:
91
+ print(f"\n🤖 Testing model: {model_name}")
92
+ model_class, model_class_name = self.get_model_class(model_name)
93
+
94
+ try:
95
+ with GeoBot(
96
+ model=model_class,
97
+ model_name=model_class_name,
98
+ use_selenium=True,
99
+ headless=self.headless,
100
+ ) as bot:
101
+ for i, sample in enumerate(test_samples):
102
+ print(f" 📍 Sample {i + 1}/{len(test_samples)}")
103
+ try:
104
+ result = self.run_single_test_with_bot(bot, sample)
105
+ all_results.append(result)
106
+
107
+ status = (
108
+ "✅ Success" if result.get("success") else "❌ Failed"
109
+ )
110
+ distance = result.get("distance_km")
111
+ dist_str = (
112
+ f"{distance:.1f} km" if distance is not None else "N/A"
113
+ )
114
+ print(f" {status} (Distance: {dist_str})")
115
+
116
+ except KeyboardInterrupt:
117
+ print("\n⏹️ Benchmark inner loop interrupted.")
118
+ raise
119
+ except Exception as e:
120
+ print(f" ❌ Test failed with unhandled exception: {e}")
121
+ all_results.append(
122
+ {
123
+ "model": model_name,
124
+ "sample_id": sample["id"],
125
+ "success": False,
126
+ "error": str(e),
127
+ }
128
+ )
129
+
130
+ except KeyboardInterrupt:
131
+ print("\n⏹️ Benchmark outer loop interrupted.")
132
+ break
133
+
134
+ self.save_results(all_results)
135
+ return self.generate_summary(all_results)
136
+
137
+ def run_single_test_with_bot(self, bot: GeoBot, location_data: Dict) -> Dict:
138
+ start_time = time.time()
139
+
140
+ assert bot.controller is not None
141
+ if not bot.controller.load_location_from_data(location_data):
142
+ return {
143
+ "success": False,
144
+ "error": "Failed to load location",
145
+ "model": bot.model_name,
146
+ "sample_id": location_data["id"],
147
+ }
148
+
149
+ screenshot = bot.take_screenshot()
150
+ if not screenshot:
151
+ return {
152
+ "success": False,
153
+ "error": "Failed to take screenshot",
154
+ "model": bot.model_name,
155
+ "sample_id": location_data["id"],
156
+ }
157
+
158
+ predicted_lat_lon = bot.analyze_image(screenshot)
159
+ inference_time = time.time() - start_time
160
+
161
+ true_coords = location_data["coordinates"]
162
+ distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
163
+
164
+ is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
165
+
166
+ return {
167
+ "sample_id": location_data["id"],
168
+ "model": bot.model_name,
169
+ "true_coordinates": true_coords,
170
+ "predicted_coordinates": predicted_lat_lon,
171
+ "distance_km": distance_km,
172
+ "inference_time": inference_time,
173
+ "success": is_success,
174
+ }
175
+
176
+ def save_results(self, results: List[Dict]):
177
+ if not results:
178
+ return
179
+ try:
180
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
181
+ results_dir = Path(DATA_PATHS["results"])
182
+ results_dir.mkdir(parents=True, exist_ok=True)
183
+ results_file = results_dir / f"benchmark_results_{timestamp}.json"
184
+ output_data = {
185
+ "metadata": {"timestamp": datetime.now().isoformat()},
186
+ "results": results,
187
+ }
188
+ with open(results_file, "w") as f:
189
+ json.dump(output_data, f, indent=2, default=str)
190
+ print(f"💾 Results saved to {results_file}")
191
+ except Exception as e:
192
+ print(f"❌ Error saving results: {e}")
193
+
194
+ def generate_summary(self, results: List[Dict]) -> Dict:
195
+ summary = {}
196
+ by_model = {}
197
+ for r in results:
198
+ model = r.get("model", "unknown")
199
+ if model not in by_model:
200
+ by_model[model] = []
201
+ by_model[model].append(r)
202
+
203
+ for model, model_results in by_model.items():
204
+ successful_runs = [r for r in model_results if r.get("success")]
205
+ distances = [
206
+ r["distance_km"]
207
+ for r in model_results
208
+ if r.get("distance_km") is not None
209
+ ]
210
+
211
+ if not model_results:
212
+ continue
213
+
214
+ summary[model] = {
215
+ "success_rate": len(successful_runs) / len(model_results)
216
+ if model_results
217
+ else 0,
218
+ "average_distance_km": sum(distances) / len(distances)
219
+ if distances
220
+ else None,
221
+ "median_distance_km": sorted(distances)[len(distances) // 2]
222
+ if distances
223
+ else None,
224
+ "min_distance_km": min(distances) if distances else None,
225
+ "max_distance_km": max(distances) if distances else None,
226
+ }
227
+ return summary
config.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration file for MapCrunch benchmark
2
+
3
+ SUCCESS_THRESHOLD_KM = 100
4
+
5
+ # MapCrunch settings
6
+ MAPCRUNCH_URL = "https://www.mapcrunch.com"
7
+
8
+ # UI element selectors
9
+ SELECTORS = {
10
+ "go_button": "#go-button",
11
+ "options_button": "#options-button",
12
+ "stealth_checkbox": "#stealth",
13
+ "urban_checkbox": "#cities",
14
+ "indoor_checkbox": "#inside",
15
+ "tour_checkbox": "#tour",
16
+ "auto_checkbox": "#auto",
17
+ "pano_container": "#pano",
18
+ "map_container": "#map",
19
+ "address_element": "#address",
20
+ "confirm_button": "#confirm-button", # Will be determined dynamically
21
+ "country_list": "#countrylist",
22
+ "continent_links": "#continents a",
23
+ }
24
+
25
+ # MapCrunch collection options
26
+ MAPCRUNCH_OPTIONS = {
27
+ "urban_only": True, # Show urban areas only
28
+ "exclude_indoor": True, # Exclude indoor views
29
+ "stealth_mode": True, # Hide location info during gameplay
30
+ "tour_mode": False, # 360 degree tour
31
+ "auto_mode": False, # Automatic slideshow
32
+ "selected_countries": None, # None means all, or list like ['us', 'gb', 'jp']
33
+ "selected_continents": None, # None means all, or list like [1, 2] # 1=N.America, 2=Europe, etc
34
+ }
35
+
36
+ # Data collection settings
37
+ DATA_COLLECTION_CONFIG = {
38
+ "save_thumbnails": True, # Save small screenshots
39
+ "thumbnail_size": (320, 240), # Thumbnail dimensions
40
+ "save_full_screenshots": False, # Save full resolution screenshots (storage intensive)
41
+ "extract_address": True, # Extract address/location name
42
+ "wait_after_go": 3, # Seconds to wait after clicking Go
43
+ "retry_on_failure": True, # Retry if location fails
44
+ "max_retries": 3, # Max retries per location
45
+ }
46
+
47
+ # Reference points for coordinate calibration (used in pyautogui coordinate system)
48
+ REFERENCE_POINTS = {
49
+ "kodiak": {"lat": 57.7916, "lon": -152.4083},
50
+ "hobart": {"lat": -42.8833, "lon": 147.3355},
51
+ }
52
+
53
+ # Selenium settings
54
+ SELENIUM_CONFIG = {
55
+ "headless": False,
56
+ "window_size": (1920, 1080),
57
+ "implicit_wait": 10,
58
+ "page_load_timeout": 30,
59
+ }
60
+
61
+ # Model configurations
62
+ MODELS_CONFIG = {
63
+ "gpt-4o": {
64
+ "class": "ChatOpenAI",
65
+ "model_name": "gpt-4o",
66
+ },
67
+ "claude-3.5-sonnet": {
68
+ "class": "ChatAnthropic",
69
+ "model_name": "claude-3-5-sonnet-20241022",
70
+ },
71
+ "gemini-1.5-pro": {
72
+ "class": "ChatGoogleGenerativeAI",
73
+ "model_name": "gemini-1.5-pro",
74
+ },
75
+ }
76
+
77
+ # Benchmark settings
78
+ BENCHMARK_CONFIG = {
79
+ "rounds_per_model": 50,
80
+ "data_collection_samples": 200,
81
+ "screenshot_delay": 2,
82
+ "click_delay": 1,
83
+ }
84
+
85
+ # Data paths
86
+ DATA_PATHS = {
87
+ "golden_labels": "data/golden_labels.json",
88
+ "screenshots": "data/screenshots/",
89
+ "thumbnails": "data/thumbnails/",
90
+ "results": "results/",
91
+ "screen_regions": "screen_regions.yaml", # Keep for backward compatibility
92
+ }
data_collector.py ADDED
@@ -0,0 +1,498 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import time
4
+ from datetime import datetime
5
+ from typing import List, Dict, Optional
6
+ from pathlib import Path
7
+ import uuid
8
+ from PIL import Image
9
+ from io import BytesIO
10
+
11
+ from mapcrunch_controller import MapCrunchController
12
+ from config import (
13
+ DATA_PATHS,
14
+ BENCHMARK_CONFIG,
15
+ DATA_COLLECTION_CONFIG,
16
+ MAPCRUNCH_OPTIONS,
17
+ )
18
+
19
+
20
+ class DataCollector:
21
+ """Collect MapCrunch location identifiers, coordinates, and thumbnails"""
22
+
23
+ def __init__(self, headless: bool = False, options: Optional[Dict] = None):
24
+ self.controller = MapCrunchController(headless=headless)
25
+ self.data = []
26
+ self.options = options or MAPCRUNCH_OPTIONS
27
+ self.setup_directories()
28
+
29
+ def setup_directories(self):
30
+ """Create necessary directories for data storage"""
31
+ for path in DATA_PATHS.values():
32
+ if path.endswith("/"):
33
+ Path(path).mkdir(parents=True, exist_ok=True)
34
+ else:
35
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
36
+
37
+ def collect_samples(
38
+ self, num_samples: Optional[int] = None, filter_indoor: Optional[bool] = None
39
+ ) -> List[Dict]:
40
+ """Collect specified number of MapCrunch locations with coordinates and thumbnails"""
41
+ if num_samples is None:
42
+ num_samples = BENCHMARK_CONFIG["data_collection_samples"]
43
+
44
+ # Override indoor filter if specified
45
+ if filter_indoor is not None:
46
+ self.options["exclude_indoor"] = filter_indoor
47
+
48
+ print(f"🚀 Starting location data collection for {num_samples} samples...")
49
+ print(
50
+ f"📍 Options: Urban={self.options.get('urban_only', False)}, Exclude Indoor={self.options.get('exclude_indoor', True)}"
51
+ )
52
+
53
+ # Setup MapCrunch options
54
+ if not self.controller.setup_collection_options(self.options):
55
+ print("⚠️ Could not configure all options, continuing anyway...")
56
+
57
+ # Setup clean environment for stealth mode if needed
58
+ if self.options.get("stealth_mode", True):
59
+ self.controller.setup_clean_environment()
60
+
61
+ successful_samples = 0
62
+ failed_samples = 0
63
+ consecutive_failures = 0
64
+
65
+ while successful_samples < num_samples:
66
+ try:
67
+ print(
68
+ f"\n📍 Collecting location {successful_samples + 1}/{num_samples}"
69
+ )
70
+
71
+ # Get new random location
72
+ if not self.controller.click_go_button():
73
+ print("❌ Failed to get new location")
74
+ failed_samples += 1
75
+ consecutive_failures += 1
76
+ if consecutive_failures > 5:
77
+ print("❌ Too many consecutive failures, stopping")
78
+ break
79
+ continue
80
+
81
+ # Wait for page to load
82
+ time.sleep(DATA_COLLECTION_CONFIG.get("wait_after_go", 5))
83
+
84
+ # Collect location data with retries
85
+ location_data = None
86
+ retries = (
87
+ DATA_COLLECTION_CONFIG.get("max_retries", 3)
88
+ if DATA_COLLECTION_CONFIG.get("retry_on_failure", True)
89
+ else 1
90
+ )
91
+
92
+ for retry in range(retries):
93
+ location_data = self.collect_single_location()
94
+ if location_data:
95
+ break
96
+ if retry < retries - 1:
97
+ print(f" ⚠️ Retry {retry + 1}/{retries - 1}")
98
+ time.sleep(1)
99
+
100
+ if location_data:
101
+ self.data.append(location_data)
102
+ successful_samples += 1
103
+ consecutive_failures = 0
104
+
105
+ # Display collected info
106
+ address = location_data.get("address", "Unknown")
107
+ lat, lng = location_data.get("lat"), location_data.get("lng")
108
+ if lat and lng:
109
+ print(
110
+ f"✅ Location {successful_samples}: {address} ({lat:.4f}, {lng:.4f})"
111
+ )
112
+ else:
113
+ print(f"✅ Location {successful_samples}: {address}")
114
+
115
+ if location_data.get("thumbnail_path"):
116
+ print(
117
+ f" 📸 Thumbnail saved: {location_data['thumbnail_path']}"
118
+ )
119
+ else:
120
+ failed_samples += 1
121
+ consecutive_failures += 1
122
+ print("❌ Location collection failed")
123
+
124
+ # Brief pause between samples
125
+ time.sleep(0.5)
126
+
127
+ except KeyboardInterrupt:
128
+ print(
129
+ f"\n⏹️ Collection stopped by user after {successful_samples} samples"
130
+ )
131
+ break
132
+ except Exception as e:
133
+ print(f"❌ Error collecting location: {e}")
134
+ failed_samples += 1
135
+ consecutive_failures += 1
136
+ continue
137
+
138
+ print("\n📊 Collection Summary:")
139
+ print(f"✅ Successful: {successful_samples}")
140
+ print(f"❌ Failed: {failed_samples}")
141
+ print(
142
+ f"📈 Success rate: {successful_samples / (successful_samples + failed_samples) * 100:.1f}%"
143
+ )
144
+
145
+ # Save collected data
146
+ self.save_data()
147
+
148
+ return self.data
149
+
150
+ def collect_single_location(self) -> Optional[Dict]:
151
+ """Collect a single location with all metadata"""
152
+ try:
153
+ sample_id = str(uuid.uuid4())
154
+ timestamp = datetime.now().isoformat()
155
+
156
+ assert self.controller.driver is not None
157
+
158
+ # 1. 获取实时坐标 (这个方法依然正确)
159
+ current_coords = self.controller.driver.execute_script(
160
+ "if (window.panorama) { return { lat: window.panorama.getPosition().lat(), lng: window.panorama.getPosition().lng() }; } else { return null; }"
161
+ )
162
+ if not current_coords or current_coords.get("lat") is None:
163
+ return None
164
+
165
+ # **2. 新增: 获取实时的链接和Pano ID**
166
+ live_identifiers = self.controller.get_live_location_identifiers()
167
+ if not live_identifiers or "error" in live_identifiers:
168
+ print(
169
+ f"⚠️ Could not get live identifiers: {live_identifiers.get('error')}"
170
+ )
171
+ return None
172
+
173
+ # 3. 获取地址
174
+ address = self.controller.get_current_address()
175
+
176
+ # 4. 创建数据记录
177
+ location_data = {
178
+ "id": sample_id,
179
+ "timestamp": timestamp,
180
+ "coordinates": current_coords,
181
+ "lat": current_coords.get("lat"),
182
+ "lng": current_coords.get("lng"),
183
+ "address": address or "Unknown",
184
+ "source": "panorama_object",
185
+ # **使用新的实时标识符**
186
+ "url": live_identifiers.get("permLink"),
187
+ "perm_link": live_identifiers.get("permLink"),
188
+ "pano_id": live_identifiers.get("panoId"),
189
+ "url_slug": live_identifiers.get("urlString"), # 新增,更可靠
190
+ "collection_options": self.options.copy(),
191
+ }
192
+
193
+ # ... (后续保存缩略图的代码不变) ...
194
+ if DATA_COLLECTION_CONFIG.get("save_thumbnails", True):
195
+ thumbnail_path = self.save_thumbnail(sample_id)
196
+ location_data["thumbnail_path"] = thumbnail_path
197
+ location_data["has_thumbnail"] = bool(thumbnail_path)
198
+
199
+ # Save full screenshot if configured (storage intensive)
200
+ if DATA_COLLECTION_CONFIG.get("save_full_screenshots", False):
201
+ screenshot_path = self.save_full_screenshot(sample_id)
202
+ if screenshot_path:
203
+ location_data["screenshot_path"] = screenshot_path
204
+
205
+ return location_data
206
+
207
+ except Exception as e:
208
+ print(f"❌ Error in collect_single_location: {e}")
209
+ return None
210
+
211
+ def save_thumbnail(self, sample_id: str) -> Optional[str]:
212
+ """Save a thumbnail of the current Street View"""
213
+ try:
214
+ # Take screenshot
215
+ screenshot_bytes = self.controller.take_street_view_screenshot()
216
+ if not screenshot_bytes:
217
+ return None
218
+
219
+ # Convert to PIL Image
220
+ image = Image.open(BytesIO(screenshot_bytes))
221
+
222
+ # Resize to thumbnail size
223
+ thumbnail_size = DATA_COLLECTION_CONFIG.get("thumbnail_size", (320, 240))
224
+ image.thumbnail(thumbnail_size, Image.Resampling.LANCZOS)
225
+
226
+ # Save thumbnail
227
+ thumbnail_filename = f"{sample_id}.jpg"
228
+ thumbnail_path = os.path.join(DATA_PATHS["thumbnails"], thumbnail_filename)
229
+
230
+ # Convert to RGB if necessary (remove alpha channel)
231
+ if image.mode in ("RGBA", "LA"):
232
+ rgb_image = Image.new("RGB", image.size, (255, 255, 255))
233
+ rgb_image.paste(
234
+ image, mask=image.split()[-1] if image.mode == "RGBA" else None
235
+ )
236
+ image = rgb_image
237
+
238
+ image.save(thumbnail_path, "JPEG", quality=85, optimize=True)
239
+
240
+ return thumbnail_filename
241
+
242
+ except Exception as e:
243
+ print(f"⚠️ Error saving thumbnail: {e}")
244
+ return None
245
+
246
+ def save_full_screenshot(self, sample_id: str) -> Optional[str]:
247
+ """Save full resolution screenshot (optional, storage intensive)"""
248
+ try:
249
+ screenshot_bytes = self.controller.take_street_view_screenshot()
250
+ if not screenshot_bytes:
251
+ return None
252
+
253
+ screenshot_filename = f"{sample_id}.png"
254
+ screenshot_path = os.path.join(
255
+ DATA_PATHS["screenshots"], screenshot_filename
256
+ )
257
+
258
+ with open(screenshot_path, "wb") as f:
259
+ f.write(screenshot_bytes)
260
+
261
+ return screenshot_filename
262
+
263
+ except Exception as e:
264
+ print(f"⚠️ Error saving screenshot: {e}")
265
+ return None
266
+
267
+ def save_data(self):
268
+ """Save collected location data to JSON file"""
269
+ try:
270
+ # Calculate statistics
271
+ stats = {
272
+ "total_samples": len(self.data),
273
+ "with_coordinates": sum(
274
+ 1 for d in self.data if d.get("lat") is not None
275
+ ),
276
+ "with_address": sum(
277
+ 1
278
+ for d in self.data
279
+ if d.get("address") and d["address"] != "Unknown"
280
+ ),
281
+ "with_thumbnails": sum(
282
+ 1 for d in self.data if d.get("has_thumbnail", False)
283
+ ),
284
+ "unique_countries": len(
285
+ set(
286
+ d.get("address", "").split(", ")[-1]
287
+ for d in self.data
288
+ if d.get("address")
289
+ )
290
+ ),
291
+ }
292
+
293
+ output_data = {
294
+ "metadata": {
295
+ "collection_date": datetime.now().isoformat(),
296
+ "total_samples": len(self.data),
297
+ "statistics": stats,
298
+ "collection_options": self.options,
299
+ "version": "3.0",
300
+ "description": "MapCrunch location data with thumbnails and metadata",
301
+ },
302
+ "samples": self.data,
303
+ }
304
+
305
+ with open(DATA_PATHS["golden_labels"], "w") as f:
306
+ json.dump(output_data, f, indent=2)
307
+
308
+ print(f"\n💾 Location data saved to {DATA_PATHS['golden_labels']}")
309
+ print("📊 Statistics:")
310
+ for key, value in stats.items():
311
+ print(f" {key}: {value}")
312
+
313
+ except Exception as e:
314
+ print(f"❌ Error saving data: {e}")
315
+
316
+ def load_existing_data(self) -> List[Dict]:
317
+ """Load existing location data"""
318
+ try:
319
+ if os.path.exists(DATA_PATHS["golden_labels"]):
320
+ with open(DATA_PATHS["golden_labels"], "r") as f:
321
+ data = json.load(f)
322
+ return data.get("samples", [])
323
+ else:
324
+ return []
325
+ except Exception as e:
326
+ print(f"❌ Error loading existing data: {e}")
327
+ return []
328
+
329
+ def validate_sample(self, sample: Dict) -> bool:
330
+ """Validate that a sample has required fields"""
331
+ required_fields = ["id", "coordinates"]
332
+
333
+ # Check required fields
334
+ if not all(field in sample for field in required_fields):
335
+ return False
336
+
337
+ # Check if coordinates are valid
338
+ coords = sample["coordinates"]
339
+ if coords.get("lat") is None or coords.get("lng") is None:
340
+ if coords.get("address") is None:
341
+ return False
342
+
343
+ return True
344
+
345
+ def clean_invalid_samples(self):
346
+ """Remove invalid samples from dataset"""
347
+ existing_data = self.load_existing_data()
348
+ valid_samples = [
349
+ sample for sample in existing_data if self.validate_sample(sample)
350
+ ]
351
+
352
+ print(
353
+ f"🧹 Cleaned dataset: {len(existing_data)} -> {len(valid_samples)} samples"
354
+ )
355
+
356
+ if len(valid_samples) != len(existing_data):
357
+ # Save cleaned data
358
+ self.data = valid_samples
359
+ self.save_data()
360
+
361
+ def filter_samples(self, filter_func=None, country=None, has_coordinates=None):
362
+ """Filter existing samples based on criteria"""
363
+ samples = self.load_existing_data()
364
+
365
+ filtered = samples
366
+
367
+ # Filter by country
368
+ if country:
369
+ filtered = [
370
+ s for s in filtered if country.lower() in s.get("address", "").lower()
371
+ ]
372
+
373
+ # Filter by coordinate availability
374
+ if has_coordinates is not None:
375
+ if has_coordinates:
376
+ filtered = [
377
+ s
378
+ for s in filtered
379
+ if s.get("lat") is not None and s.get("lng") is not None
380
+ ]
381
+ else:
382
+ filtered = [
383
+ s for s in filtered if s.get("lat") is None or s.get("lng") is None
384
+ ]
385
+
386
+ # Apply custom filter
387
+ if filter_func:
388
+ filtered = [s for s in filtered if filter_func(s)]
389
+
390
+ print(f"🔍 Filtered: {len(samples)} -> {len(filtered)} samples")
391
+ return filtered
392
+
393
+ def export_summary(self, output_file: str = "data_summary.txt"):
394
+ """Export a human-readable summary of collected data"""
395
+ samples = self.load_existing_data()
396
+
397
+ with open(output_file, "w") as f:
398
+ f.write("MapCrunch Data Collection Summary\n")
399
+ f.write("=" * 50 + "\n\n")
400
+
401
+ for i, sample in enumerate(samples):
402
+ f.write(f"Sample {i + 1}:\n")
403
+ f.write(f" ID: {sample['id'][:8]}...\n")
404
+ f.write(f" Address: {sample.get('address', 'Unknown')}\n")
405
+ f.write(
406
+ f" Coordinates: {sample.get('lat', 'N/A')}, {sample.get('lng', 'N/A')}\n"
407
+ )
408
+ f.write(
409
+ f" Thumbnail: {'Yes' if sample.get('has_thumbnail') else 'No'}\n"
410
+ )
411
+ f.write(f" Collected: {sample.get('timestamp', 'Unknown')}\n")
412
+ f.write("-" * 30 + "\n")
413
+
414
+ print(f"📄 Summary exported to {output_file}")
415
+
416
+ def close(self):
417
+ """Clean up resources"""
418
+ self.controller.close()
419
+
420
+ def __enter__(self):
421
+ return self
422
+
423
+ def __exit__(self, exc_type, exc_val, exc_tb):
424
+ self.close()
425
+
426
+
427
+ def main():
428
+ """Main function for data collection"""
429
+ import argparse
430
+
431
+ parser = argparse.ArgumentParser(
432
+ description="Collect MapCrunch location data for benchmark"
433
+ )
434
+ parser.add_argument(
435
+ "--samples", type=int, default=50, help="Number of locations to collect"
436
+ )
437
+ parser.add_argument(
438
+ "--headless", action="store_true", help="Run browser in headless mode"
439
+ )
440
+ parser.add_argument(
441
+ "--clean", action="store_true", help="Clean invalid samples from existing data"
442
+ )
443
+ parser.add_argument(
444
+ "--urban", action="store_true", help="Collect only urban locations"
445
+ )
446
+ parser.add_argument("--no-indoor", action="store_true", help="Exclude indoor views")
447
+ parser.add_argument(
448
+ "--countries",
449
+ nargs="+",
450
+ help="Specific countries to collect from (e.g., us gb jp)",
451
+ )
452
+ parser.add_argument(
453
+ "--export-summary", action="store_true", help="Export summary of collected data"
454
+ )
455
+ parser.add_argument(
456
+ "--filter-country", help="Filter samples by country when exporting"
457
+ )
458
+
459
+ args = parser.parse_args()
460
+
461
+ if args.clean:
462
+ print("🧹 Cleaning existing dataset...")
463
+ with DataCollector(headless=True) as collector:
464
+ collector.clean_invalid_samples()
465
+ return
466
+
467
+ if args.export_summary:
468
+ print("📄 Exporting data summary...")
469
+ with DataCollector(headless=True) as collector:
470
+ if args.filter_country:
471
+ samples = collector.filter_samples(country=args.filter_country)
472
+ collector.data = samples
473
+ collector.export_summary(f"data_summary_{args.filter_country}.txt")
474
+ else:
475
+ collector.export_summary()
476
+ return
477
+
478
+ # Configure collection options
479
+ options = MAPCRUNCH_OPTIONS.copy()
480
+
481
+ if args.urban:
482
+ options["urban_only"] = True
483
+
484
+ if args.no_indoor:
485
+ options["exclude_indoor"] = True
486
+
487
+ if args.countries:
488
+ options["selected_countries"] = args.countries
489
+
490
+ # Collect new location data
491
+ with DataCollector(headless=args.headless, options=options) as collector:
492
+ data = collector.collect_samples(args.samples)
493
+ print(f"\n🎉 Collection complete! Collected {len(data)} location samples.")
494
+ print("📊 Ready for benchmark testing with these locations.")
495
+
496
+
497
+ if __name__ == "__main__":
498
+ main()
geo_bot.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # geo_bot.py (Final Version)
2
+
3
+ from io import BytesIO
4
+ import os
5
+ import dotenv
6
+ import base64
7
+ import re # 导入 re 模块
8
+ from typing import Tuple, List, Optional
9
+ from PIL import Image
10
+
11
+ from langchain_core.messages import HumanMessage, BaseMessage
12
+ from langchain_openai import ChatOpenAI
13
+ from langchain_anthropic import ChatAnthropic
14
+ from langchain_google_genai import ChatGoogleGenerativeAI
15
+
16
+ from mapcrunch_controller import MapCrunchController
17
+ from config import REFERENCE_POINTS
18
+
19
+ dotenv.load_dotenv()
20
+
21
+ PROMPT_INSTRUCTIONS = """
22
+ Try to predict where the image was taken.
23
+ First describe the relevant details in the image to do it.
24
+ List some regions and places where it could be.
25
+ Choose the most likely Country and City or Specific Location.
26
+ At the end, in the last line apart from the previous reasoning, write the Latitude and Longitude from that guessed location
27
+ using the following format, making sure that the coords are valid floats, without anything else and making sure to be consistent with the format:
28
+ Lat: XX.XXXX, Lon: XX.XXXX
29
+ """
30
+
31
+
32
+ class GeoBot:
33
+ prompt_instructions: str = PROMPT_INSTRUCTIONS
34
+
35
+ def __init__(
36
+ self, model=ChatOpenAI, model_name="gpt-4o", use_selenium=True, headless=False
37
+ ):
38
+ self.model = model(model=model_name)
39
+ self.model_name = model_name
40
+ self.use_selenium = use_selenium
41
+ self.controller = (
42
+ MapCrunchController(headless=headless) if use_selenium else None
43
+ )
44
+
45
+ # Get screen and map regions
46
+ if use_selenium:
47
+ self._setup_screen_regions()
48
+ else:
49
+ # Fallback to manual regions (backward compatibility)
50
+ self._load_manual_regions()
51
+
52
+ # Reference points for coordinate calibration
53
+ self.kodiak_lat, self.kodiak_lon = (
54
+ REFERENCE_POINTS["kodiak"]["lat"],
55
+ REFERENCE_POINTS["kodiak"]["lon"],
56
+ )
57
+ self.hobart_lat, self.hobart_lon = (
58
+ REFERENCE_POINTS["hobart"]["lat"],
59
+ REFERENCE_POINTS["hobart"]["lon"],
60
+ )
61
+
62
+ def _setup_screen_regions(self):
63
+ """Setup screen regions using Selenium element positions"""
64
+ try:
65
+ # Get map element info
66
+ map_info = self.controller.get_map_element_info()
67
+
68
+ # Convert browser coordinates to screen coordinates
69
+ self.map_x = map_info["x"]
70
+ self.map_y = map_info["y"]
71
+ self.map_w = map_info["width"]
72
+ self.map_h = map_info["height"]
73
+
74
+ # Set screen capture region (full window)
75
+ window_size = self.controller.driver.get_window_size()
76
+ self.screen_x, self.screen_y = 0, 0
77
+ self.screen_w = window_size["width"]
78
+ self.screen_h = window_size["height"]
79
+
80
+ # Reference points for coordinate conversion (approximate map positions)
81
+ # These would need to be calibrated for MapCrunch's specific map projection
82
+ self.kodiak_x = self.map_x + int(self.map_w * 0.1) # Approximate
83
+ self.kodiak_y = self.map_y + int(self.map_h * 0.2)
84
+ self.hobart_x = self.map_x + int(self.map_w * 0.9)
85
+ self.hobart_y = self.map_y + int(self.map_h * 0.8)
86
+
87
+ print(
88
+ f"📍 Screen regions setup: Map({self.map_x},{self.map_y},{self.map_w},{self.map_h})"
89
+ )
90
+
91
+ except Exception as e:
92
+ print(f"⚠️ Warning: Could not setup screen regions via Selenium: {e}")
93
+ self._load_manual_regions()
94
+
95
+ def _load_manual_regions(self):
96
+ """Fallback to manual screen regions (backward compatibility)"""
97
+ import yaml
98
+
99
+ try:
100
+ with open("screen_regions.yaml") as f:
101
+ screen_regions = yaml.safe_load(f)
102
+
103
+ self.screen_x, self.screen_y = screen_regions["screen_top_left"]
104
+ self.screen_w = screen_regions["screen_bot_right"][0] - self.screen_x
105
+ self.screen_h = screen_regions["screen_bot_right"][1] - self.screen_y
106
+
107
+ self.map_x, self.map_y = screen_regions["map_top_left_1"]
108
+ self.map_w = screen_regions["map_bot_right_1"][0] - self.map_x
109
+ self.map_h = screen_regions["map_bot_right_1"][1] - self.map_y
110
+
111
+ self.kodiak_x, self.kodiak_y = screen_regions["kodiak_1"]
112
+ self.hobart_x, self.hobart_y = screen_regions["hobart_1"]
113
+
114
+ except FileNotFoundError:
115
+ print("❌ No screen_regions.yaml found and Selenium setup failed")
116
+ raise
117
+
118
+ @staticmethod
119
+ def pil_to_base64(image: Image) -> str:
120
+ buffered = BytesIO()
121
+ image.save(buffered, format="PNG")
122
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
123
+
124
+ @classmethod
125
+ def create_message(cls, images_data: List[str]) -> HumanMessage:
126
+ content = [{"type": "text", "text": cls.prompt_instructions}]
127
+ for img_data in images_data:
128
+ content.append(
129
+ {
130
+ "type": "image_url",
131
+ "image_url": {"url": f"data:image/png;base64,{img_data}"},
132
+ }
133
+ )
134
+ return HumanMessage(content=content)
135
+
136
+ def extract_lat_lon_from_response(
137
+ self, response: BaseMessage
138
+ ) -> Optional[Tuple[float, float]]:
139
+ """Extracts latitude and longitude from LLM response using regex for robustness."""
140
+ try:
141
+ content = response.content.strip()
142
+ last_line = ""
143
+ for line in reversed(content.split("\n")):
144
+ if "lat" in line.lower() and "lon" in line.lower():
145
+ last_line = line
146
+ break
147
+
148
+ if not last_line:
149
+ print(f"❌ No coordinate line found in response.")
150
+ return None
151
+
152
+ print(f"🎯 {self.model_name} Prediction: {last_line}")
153
+
154
+ numbers = re.findall(r"[-+]?\d*\.\d+|\d+", last_line)
155
+
156
+ if len(numbers) < 2:
157
+ print(
158
+ f"❌ Could not find two numbers for lat/lon in line: '{last_line}'"
159
+ )
160
+ return None
161
+
162
+ lat, lon = float(numbers[0]), float(numbers[1])
163
+
164
+ if not (-90 <= lat <= 90 and -180 <= lon <= 180):
165
+ print(f"❌ Invalid coordinates extracted: Lat {lat}, Lon {lon}")
166
+ return None
167
+
168
+ return lat, lon
169
+
170
+ except Exception as e:
171
+ print(
172
+ f"❌ Error parsing lat/lon from response: {e}\nFull response was:\n{content}"
173
+ )
174
+ return None
175
+
176
+ def take_screenshot(self) -> Optional[Image.Image]:
177
+ if self.use_selenium and self.controller:
178
+ screenshot_bytes = self.controller.take_street_view_screenshot()
179
+ if screenshot_bytes:
180
+ return Image.open(BytesIO(screenshot_bytes))
181
+ return None
182
+
183
+ def analyze_image(self, image: Image) -> Optional[Tuple[float, float]]:
184
+ """Analyze image and return predicted latitude and longitude."""
185
+ try:
186
+ screenshot_b64 = self.pil_to_base64(image)
187
+ message = self.create_message([screenshot_b64])
188
+
189
+ response = self.model.invoke([message])
190
+ print(f"\n🤖 Full response from {self.model_name}:")
191
+ print(response.content)
192
+
193
+ # 直接返回 (lat, lon) 元组
194
+ return self.extract_lat_lon_from_response(response)
195
+
196
+ except Exception as e:
197
+ print(f"❌ Error in analyze_image: {e}")
198
+ return None
199
+
200
+ def close(self):
201
+ if self.controller:
202
+ self.controller.close()
203
+
204
+ def __enter__(self):
205
+ return self
206
+
207
+ def __exit__(self, exc_type, exc_val, exc_tb):
208
+ self.close()
geoguessr_bot.py DELETED
@@ -1,200 +0,0 @@
1
- from io import BytesIO
2
- import os
3
- import dotenv
4
- import base64
5
- import pyautogui
6
- import matplotlib.pyplot as plt
7
- import math
8
- from time import time, sleep
9
- from typing import Tuple, List
10
- from PIL import Image
11
-
12
- from langchain_core.messages import HumanMessage, BaseMessage
13
- from langchain_openai import ChatOpenAI
14
- from langchain_anthropic import ChatAnthropic
15
- from langchain_google_genai import ChatGoogleGenerativeAI
16
-
17
- dotenv.load_dotenv()
18
-
19
-
20
- PROMPT_INSTRUCTIONS = """
21
- Try to predict where the image was taken.
22
- First describe the relevant details in the image to do it.
23
- List some regions and places where it could be.
24
- Chose the most likely Country and City or Specific Location.
25
- At the end, in the last line a part from the previous reasoning, write the Latitude and Longitude from that guessed location
26
- using the following format, making sure that the coords are valid floats, without anything else and making sure to be consistent with the format:
27
- Lat: XX.XXXX, Lon: XX.XXXX
28
- """
29
-
30
-
31
- class GeoBot:
32
- prompt_instructions: str = PROMPT_INSTRUCTIONS
33
-
34
- def __init__(self, screen_regions, player=1, model=ChatOpenAI, model_name="gpt-4o"):
35
- self.player = player
36
- self.screen_regions = screen_regions
37
- self.screen_x, self.screen_y = screen_regions["screen_top_left"]
38
- self.screen_w = screen_regions["screen_bot_right"][0] - self.screen_x
39
- self.screen_h = screen_regions["screen_bot_right"][1] - self.screen_y
40
- self.screen_xywh = (self.screen_x, self.screen_y, self.screen_w, self.screen_h)
41
-
42
- self.map_x, self.map_y = screen_regions[f"map_top_left_{player}"]
43
- self.map_w = screen_regions[f"map_bot_right_{player}"][0] - self.map_x
44
- self.map_h = screen_regions[f"map_bot_right_{player}"][1] - self.map_y
45
- self.minimap_xywh = (self.map_x, self.map_y, self.map_w, self.map_h)
46
-
47
- self.next_round_button = screen_regions["next_round_button"] if player==1 else None
48
- self.confirm_button = screen_regions[f"confirm_button_{player}"]
49
-
50
- self.kodiak_x, self.kodiak_y = screen_regions[f"kodiak_{player}"]
51
- self.hobart_x, self.hobart_y = screen_regions[f"hobart_{player}"]
52
-
53
- # Refernece points to calibrate the minimap everytime
54
- self.kodiak_lat, self.kodiak_lon = (57.7916, -152.4083)
55
- self.hobart_lat, self.hobart_lon = (-42.8833, 147.3355)
56
-
57
- self.model = model(model=model_name)
58
-
59
-
60
- @staticmethod
61
- def pil_to_base64(image: Image) -> str:
62
- buffered = BytesIO()
63
- image.save(buffered, format="PNG")
64
- img_base64_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
65
-
66
- return img_base64_str
67
-
68
-
69
- @classmethod
70
- def create_message(cls, images_data: List[str]) -> HumanMessage:
71
- message = HumanMessage(
72
- content=[
73
- {
74
- "type": "text",
75
- "text": cls.prompt_instructions,
76
- },
77
- ] + [
78
- {
79
- "type": "image_url",
80
- "image_url": {"url": f"data:image/png;base64,{img_data}"},
81
- }
82
- for img_data in images_data],
83
- )
84
-
85
- return message
86
-
87
-
88
- def extract_location_from_response(self, response: BaseMessage) -> Tuple[float, float]:
89
- try:
90
- response = response.content.split("\n")
91
- while response and len(response[-1]) == 0 and "lat" not in response[-1].lower():
92
- response.pop()
93
- if response:
94
- prediction = response[-1]
95
- else:
96
- return None
97
- print(f"\n-------\n{self.model} Prediction:\n", prediction)
98
-
99
- # Lat: 57.7916, Lon: -152.4083
100
- lat = float(prediction.split(",")[0].split(":")[1])
101
- lon = float(prediction.split(",")[1].split(":")[1])
102
-
103
- x, y = self.lat_lon_to_mercator_map_pixels(lat, lon)
104
- print(f"Normalized pixel coordinates: ({x}, {y})")
105
-
106
- if x < self.map_x:
107
- x = self.map_x
108
- print("x out of bounds")
109
- elif x > self.map_x+self.map_w:
110
- x = self.map_x+self.map_w
111
- print("x out of bounds")
112
- if y < self.map_y:
113
- y = self.map_y
114
- print("y out of bounds")
115
- elif y > self.map_y+self.map_h:
116
- y = self.map_y+self.map_h
117
- print("y out of bounds")
118
-
119
- return x, y
120
-
121
- except Exception as e:
122
- print("Error:", e)
123
- return None
124
-
125
-
126
- @staticmethod
127
- def lat_to_mercator_y(lat: float) -> float:
128
- return math.log(math.tan(math.pi / 4 + math.radians(lat) / 2))
129
-
130
-
131
- def lat_lon_to_mercator_map_pixels(self, lat: float, lon: float) -> Tuple[int, int]:
132
- """
133
- Convert latitude and longitude to pixel coordinates on the mercator projection minimap,
134
- taking two known points 1 and 2 as a reference.
135
-
136
- Args:
137
- lat (float): Latitude (Decimal Degrees) of the point to convert.
138
- lon (float): Longitude (Decimal Degrees) of the point to convert.
139
-
140
- Returns:
141
- tuple: x, y pixel coordinates of the point.
142
- """
143
-
144
- # Calculate the x pixel coordinate
145
- lon_diff_ref = (self.kodiak_lon - self.hobart_lon)
146
- lon_diff = (self.kodiak_lon - lon)
147
-
148
- x = abs(self.kodiak_x - self.hobart_x) * (lon_diff / lon_diff_ref) + self.kodiak_x
149
-
150
- # Convert latitude and longitude to mercator projection y coordinates
151
- mercator_y1 = self.lat_to_mercator_y(self.kodiak_lat)
152
- mercator_y2 = self.lat_to_mercator_y(self.hobart_lat)
153
- mercator_y = self.lat_to_mercator_y(lat)
154
-
155
- # Calculate the y pixel coordinate
156
- lat_diff_ref = (mercator_y1 - mercator_y2)
157
- lat_diff = (mercator_y1 - mercator_y)
158
-
159
- y = abs(self.kodiak_y - self.hobart_y) * (lat_diff / lat_diff_ref) + self.kodiak_y
160
-
161
- return round(x), round(y)
162
-
163
-
164
- def select_map_location(self, x: int, y: int, plot: bool = False) -> None:
165
- # Hovering over the minimap to expand it
166
- pyautogui.moveTo(self.map_x+self.map_w-15, self.map_y+self.map_h-15, duration=0.5)
167
- #bot.screen_w-50, bot.screen_h-80
168
- # pyautogui.moveTo(self.screen_w-50, self.screen_h-80, duration=1.5)
169
- # print(self.screen_w-50, self.screen_h-80)
170
- print('finish moving')
171
- sleep(0.5)
172
-
173
- # Clicking on the predicted location
174
- pyautogui.click(x, y, duration=0.5)
175
- print('finish clicking')
176
- sleep(0.5)
177
-
178
- if plot:
179
- self.plot_minimap(x, y)
180
-
181
- # Confirming the guessed location
182
- pyautogui.click(self.confirm_button, duration=0.2)
183
- sleep(2)
184
-
185
-
186
- def plot_minimap(self, x: int = None, y: int = None) -> None:
187
- minimap = pyautogui.screenshot(region=self.minimap_xywh)
188
- plot_kodiak_x = self.kodiak_x - self.map_x
189
- plot_kodiak_y = self.kodiak_y - self.map_y
190
- plot_hobart_x = self.hobart_x - self.map_x
191
- plot_hobart_y = self.hobart_y - self.map_y
192
- plt.imshow(minimap)
193
- plt.plot(plot_hobart_x, plot_hobart_y, 'ro')
194
- plt.plot(plot_kodiak_x, plot_kodiak_y, 'ro')
195
- if x and y:
196
- plt.plot(x-self.map_x, y-self.map_y, 'bo')
197
-
198
- os.makedirs("plots", exist_ok=True)
199
- plt.savefig("plots/minimap.png")
200
- # plt.show()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Main entry point for MapCrunch geo-location testing
4
+
5
+ Usage:
6
+ python main.py --mode data --samples 50 --urban --no-indoor # Collect filtered data
7
+ python main.py --mode benchmark --models gpt-4o claude-3.5-sonnet # Run benchmark
8
+ python main.py --mode interactive --model gpt-4o # Interactive testing
9
+ """
10
+
11
+ import argparse
12
+ import os
13
+ from time import sleep
14
+ from typing import Dict
15
+
16
+ from langchain_openai import ChatOpenAI
17
+ from langchain_anthropic import ChatAnthropic
18
+ from langchain_google_genai import ChatGoogleGenerativeAI
19
+
20
+ from geo_bot import GeoBot
21
+ from data_collector import DataCollector
22
+ from benchmark import MapGuesserBenchmark
23
+ from config import MODELS_CONFIG, SUCCESS_THRESHOLD_KM
24
+
25
+
26
+ def interactive_mode(model_name: str = "gpt-4o", turns: int = 5, plot: bool = False):
27
+ """Interactive mode - play turns manually like the original"""
28
+ print(f"🎮 Starting interactive mode with {model_name}")
29
+
30
+ # Get model class
31
+ config = MODELS_CONFIG.get(model_name)
32
+ if not config:
33
+ print(f"❌ Unknown model: {model_name}")
34
+ return
35
+
36
+ model_class_name = config["class"]
37
+ model_class = globals()[model_class_name]
38
+ model_instance = config["model_name"]
39
+
40
+ # Create bot with Selenium integration
41
+ with GeoBot(model=model_class, model_name=model_instance, use_selenium=True) as bot:
42
+ # Setup clean environment
43
+ if bot.controller:
44
+ bot.controller.setup_clean_environment()
45
+
46
+ for turn in range(turns):
47
+ print(f"\n{'=' * 50}")
48
+ print(f"🎯 Turn {turn + 1}/{turns}")
49
+ print(f"{'=' * 50}")
50
+
51
+ try:
52
+ # Get new location (click Go button)
53
+ if bot.controller:
54
+ if not bot.controller.click_go_button():
55
+ print("❌ Failed to get new location")
56
+ continue
57
+ else:
58
+ print("⚠️ Manual mode: Please click Go button and press Enter")
59
+ input()
60
+
61
+ # Take screenshot and analyze
62
+ screenshot = bot.take_screenshot()
63
+ location = bot.analyze_image(screenshot)
64
+
65
+ if location is not None:
66
+ bot.select_map_location(*location, plot=plot)
67
+ print("✅ Location selected successfully")
68
+ else:
69
+ print("❌ Could not determine location")
70
+ # Select a default location
71
+ bot.select_map_location(
72
+ x=bot.map_x + bot.map_w // 2,
73
+ y=bot.map_y + bot.map_h // 2,
74
+ plot=plot,
75
+ )
76
+
77
+ # Brief pause between turns
78
+ sleep(2)
79
+
80
+ except KeyboardInterrupt:
81
+ print(f"\n⏹️ Game stopped by user after {turn + 1} turns")
82
+ break
83
+ except Exception as e:
84
+ print(f"❌ Error in turn {turn + 1}: {e}")
85
+ continue
86
+
87
+
88
+ def data_collection_mode(
89
+ samples: int = 50, headless: bool = False, options: Dict = None
90
+ ):
91
+ """Data collection mode"""
92
+ print(f"📊 Starting data collection mode - {samples} samples")
93
+
94
+ if options:
95
+ print(f"🔧 Using custom options: {options}")
96
+
97
+ with DataCollector(headless=headless, options=options) as collector:
98
+ data = collector.collect_samples(samples)
99
+ print(f"✅ Collected {len(data)} samples successfully")
100
+
101
+
102
+ def benchmark_mode(
103
+ models: list = None, samples: int = 10, live: bool = False, headless: bool = False
104
+ ):
105
+ """Benchmark mode"""
106
+ if models is None:
107
+ models = ["gpt-4o"] # Default model
108
+
109
+ print(f"🏁 Starting benchmark mode")
110
+ print(f" Models: {models}")
111
+ print(f" Samples per model: {samples}")
112
+ print(f" Mode: {'live' if live else 'offline'}")
113
+
114
+ benchmark = MapGuesserBenchmark(headless=headless)
115
+
116
+ try:
117
+ summary = benchmark.run_benchmark(
118
+ models=models, max_samples=samples, use_live_mode=live
119
+ )
120
+
121
+ print(f"\n🎉 Benchmark Complete!")
122
+
123
+ if summary:
124
+ print(f"\n📊 Results Summary:")
125
+ for model, stats in summary.items():
126
+ print(f"\n🤖 {model}:")
127
+ print(
128
+ f" Success Rate (under {SUCCESS_THRESHOLD_KM}km): {stats.get('success_rate', 0) * 100:.1f}%"
129
+ )
130
+ print(f" 📏 Average Distance: {stats['average_distance_km']:.1f} km")
131
+ print(f" 📊 Median Distance: {stats['median_distance_km']:.1f} km")
132
+ print(f" 🎯 Best: {stats['min_distance_km']:.1f} km")
133
+ print(f" 📈 Worst: {stats['max_distance_km']:.1f} km")
134
+
135
+ except Exception as e:
136
+ print(f"❌ Benchmark failed: {e}")
137
+
138
+
139
+ def main():
140
+ parser = argparse.ArgumentParser(
141
+ description="MapCrunch Geo-Location AI Benchmark",
142
+ formatter_class=argparse.RawDescriptionHelpFormatter,
143
+ epilog="""
144
+ Examples:
145
+ # Collect training data with filters
146
+ python main.py --mode data --samples 100 --urban --no-indoor
147
+
148
+ # Collect from specific countries
149
+ python main.py --mode data --samples 50 --countries us gb jp --urban
150
+
151
+ # Run benchmark on saved data
152
+ python main.py --mode benchmark --models gpt-4o claude-3.5-sonnet --samples 20
153
+
154
+ # Interactive testing
155
+ python main.py --mode interactive --model gpt-4o --turns 5 --plot
156
+
157
+ # Live benchmark (uses MapCrunch website directly)
158
+ python main.py --mode benchmark --live --models gpt-4o
159
+ """,
160
+ )
161
+
162
+ parser.add_argument(
163
+ "--mode",
164
+ choices=["interactive", "data", "benchmark"],
165
+ default="interactive",
166
+ help="Operation mode",
167
+ )
168
+
169
+ # Interactive mode options
170
+ parser.add_argument(
171
+ "--model",
172
+ choices=list(MODELS_CONFIG.keys()),
173
+ default="gpt-4o",
174
+ help="Model for interactive mode",
175
+ )
176
+ parser.add_argument(
177
+ "--turns", type=int, default=5, help="Number of turns in interactive mode"
178
+ )
179
+ parser.add_argument(
180
+ "--plot", action="store_true", help="Generate plots of predictions"
181
+ )
182
+
183
+ # Data collection options
184
+ parser.add_argument(
185
+ "--samples", type=int, default=50, help="Number of samples to collect/test"
186
+ )
187
+ parser.add_argument(
188
+ "--urban", action="store_true", help="Collect only urban locations"
189
+ )
190
+ parser.add_argument("--no-indoor", action="store_true", help="Exclude indoor views")
191
+ parser.add_argument(
192
+ "--countries",
193
+ nargs="+",
194
+ help="Specific countries to collect from (e.g., us gb jp)",
195
+ )
196
+
197
+ # Benchmark options
198
+ parser.add_argument(
199
+ "--models",
200
+ nargs="+",
201
+ choices=list(MODELS_CONFIG.keys()),
202
+ help="Models to benchmark",
203
+ )
204
+ parser.add_argument(
205
+ "--live", action="store_true", help="Use live MapCrunch website for benchmark"
206
+ )
207
+
208
+ # General options
209
+ parser.add_argument(
210
+ "--headless", action="store_true", help="Run browser in headless mode"
211
+ )
212
+
213
+ args = parser.parse_args()
214
+
215
+ print(f"🚀 MapCrunch Geo-Location AI Benchmark")
216
+ print(f" Mode: {args.mode}")
217
+
218
+ try:
219
+ if args.mode == "interactive":
220
+ interactive_mode(model_name=args.model, turns=args.turns, plot=args.plot)
221
+
222
+ elif args.mode == "data":
223
+ # Configure collection options from args
224
+ from config import MAPCRUNCH_OPTIONS
225
+
226
+ options = MAPCRUNCH_OPTIONS.copy()
227
+
228
+ if args.urban:
229
+ options["urban_only"] = True
230
+ if args.no_indoor:
231
+ options["exclude_indoor"] = True
232
+ if args.countries:
233
+ options["selected_countries"] = args.countries
234
+
235
+ data_collection_mode(
236
+ samples=args.samples, headless=args.headless, options=options
237
+ )
238
+
239
+ elif args.mode == "benchmark":
240
+ benchmark_mode(
241
+ models=args.models,
242
+ samples=args.samples,
243
+ live=args.live,
244
+ headless=args.headless,
245
+ )
246
+
247
+ except KeyboardInterrupt:
248
+ print(f"\n⏹️ Operation interrupted by user")
249
+ except Exception as e:
250
+ print(f"❌ Error: {e}")
251
+ raise
252
+
253
+
254
+ if __name__ == "__main__":
255
+ main()
mapcrunch_controller.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mapcrunch_controller.py
2
+
3
+ from selenium import webdriver
4
+ from selenium.webdriver.common.by import By
5
+ from selenium.webdriver.support.ui import WebDriverWait
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from selenium.webdriver.common.action_chains import ActionChains
8
+ from selenium.webdriver.chrome.options import Options
9
+ import time
10
+ from typing import Dict, Optional, Tuple
11
+ from config import MAPCRUNCH_URL, SELECTORS, SELENIUM_CONFIG
12
+
13
+
14
+ class MapCrunchController:
15
+ """Selenium controller for MapCrunch website automation"""
16
+
17
+ def __init__(self, headless: bool = False):
18
+ self.driver = None
19
+ self.wait = None
20
+ self.headless = headless
21
+ self.setup_driver()
22
+
23
+ def setup_driver(self):
24
+ """Initialize Chrome driver with appropriate settings"""
25
+ chrome_options = Options()
26
+
27
+ if self.headless:
28
+ chrome_options.add_argument("--headless")
29
+
30
+ chrome_options.add_argument(
31
+ f"--window-size={SELENIUM_CONFIG['window_size'][0]},{SELENIUM_CONFIG['window_size'][1]}"
32
+ )
33
+ chrome_options.add_argument("--disable-gpu")
34
+ chrome_options.add_argument("--no-sandbox")
35
+ chrome_options.add_argument("--disable-dev-shm-usage")
36
+
37
+ self.driver = webdriver.Chrome(options=chrome_options)
38
+ self.driver.set_window_size(*SELENIUM_CONFIG["window_size"])
39
+ self.wait = WebDriverWait(self.driver, SELENIUM_CONFIG["implicit_wait"])
40
+
41
+ self.driver.get(MAPCRUNCH_URL)
42
+ time.sleep(3)
43
+
44
+ def setup_clean_environment(self):
45
+ """Configure MapCrunch for clean benchmark environment"""
46
+ try:
47
+ assert self.driver is not None
48
+ self.driver.execute_script("""
49
+ const elementsToHide = ['#menu', '#info-box', '#social', '#bottom-box'];
50
+ elementsToHide.forEach(sel => {
51
+ const el = document.querySelector(sel);
52
+ if (el) el.style.display = 'none';
53
+ });
54
+ """)
55
+ print("✅ Environment configured for clean benchmark")
56
+ except Exception as e:
57
+ print(f"⚠️ Warning: Could not fully configure environment: {e}")
58
+
59
+ def setup_collection_options(self, options: Dict = None):
60
+ from config import MAPCRUNCH_OPTIONS
61
+
62
+ if options is None:
63
+ options = MAPCRUNCH_OPTIONS
64
+ try:
65
+ assert self.wait is not None
66
+ options_button = self.wait.until(
67
+ EC.element_to_be_clickable(
68
+ (By.CSS_SELECTOR, SELECTORS["options_button"])
69
+ )
70
+ )
71
+ options_button.click()
72
+ time.sleep(1)
73
+
74
+ assert self.driver is not None
75
+ # Urban
76
+ urban_checkbox = self.driver.find_element(
77
+ By.CSS_SELECTOR, SELECTORS["urban_checkbox"]
78
+ )
79
+ if options.get("urban_only", False) != urban_checkbox.is_selected():
80
+ urban_checkbox.click()
81
+
82
+ # Indoor
83
+ indoor_checkbox = self.driver.find_element(
84
+ By.CSS_SELECTOR, SELECTORS["indoor_checkbox"]
85
+ )
86
+ if options.get("exclude_indoor", True) == indoor_checkbox.is_selected():
87
+ indoor_checkbox.click()
88
+
89
+ # Stealth
90
+ stealth_checkbox = self.driver.find_element(
91
+ By.CSS_SELECTOR, SELECTORS["stealth_checkbox"]
92
+ )
93
+ if options.get("stealth_mode", True) != stealth_checkbox.is_selected():
94
+ stealth_checkbox.click()
95
+
96
+ options_button.click()
97
+ time.sleep(0.5)
98
+ print("✅ Collection options configured")
99
+ return True
100
+ except Exception as e:
101
+ print(f"❌ Error configuring options: {e}")
102
+ return False
103
+
104
+ def _select_countries(self, country_codes: list):
105
+ """Select specific countries in the options panel"""
106
+ try:
107
+ # First, deselect all
108
+ assert self.driver is not None
109
+ all_countries = self.driver.find_elements(By.CSS_SELECTOR, "#countrylist a")
110
+ for country in all_countries:
111
+ class_attr = country.get_attribute("class")
112
+ if class_attr is not None and "hover" not in class_attr:
113
+ country.click()
114
+ time.sleep(0.1)
115
+
116
+ # Then select desired countries
117
+ for code in country_codes:
118
+ country = self.driver.find_element(
119
+ By.CSS_SELECTOR, f'a[data-code="{code}"]'
120
+ )
121
+ class_attr = country.get_attribute("class")
122
+ if class_attr is not None and "hover" in class_attr:
123
+ country.click()
124
+ time.sleep(0.1)
125
+
126
+ print(f"✅ Selected countries: {country_codes}")
127
+
128
+ except Exception as e:
129
+ print(f"⚠️ Warning: Could not select countries: {e}")
130
+
131
+ def click_go_button(self) -> bool:
132
+ """Click the Go button to get new Street View location"""
133
+ try:
134
+ assert self.wait is not None
135
+ go_button = self.wait.until(
136
+ EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["go_button"]))
137
+ )
138
+ go_button.click()
139
+ # **重要**: 等待JS执行完毕并更新内容
140
+ time.sleep(DATA_COLLECTION_CONFIG.get("wait_after_go", 5))
141
+ return True
142
+ except Exception as e:
143
+ print(f"❌ Error clicking Go button: {e}")
144
+ return False
145
+
146
+ def get_current_address(self) -> Optional[str]:
147
+ """Extract current address/location name from the page"""
148
+ try:
149
+ assert self.wait is not None
150
+ address_element = self.wait.until(
151
+ EC.visibility_of_element_located(
152
+ (By.CSS_SELECTOR, SELECTORS["address_element"])
153
+ )
154
+ )
155
+ address_text = address_element.text.strip()
156
+ address_title = address_element.get_attribute("title") or ""
157
+ return (
158
+ address_title
159
+ if len(address_title) > len(address_text)
160
+ else address_text
161
+ )
162
+ except Exception:
163
+ # 在stealth模式下,这个元素可能是隐藏的,所以找不到是正常的
164
+ return "Stealth Mode"
165
+
166
+ # **新增**: 重新加入 get_map_element_info 函数
167
+ def get_map_element_info(self) -> Dict:
168
+ """Get map element position and size for coordinate conversion."""
169
+ try:
170
+ assert self.wait is not None
171
+ map_element = self.wait.until(
172
+ EC.presence_of_element_located(
173
+ (By.CSS_SELECTOR, SELECTORS["map_container"])
174
+ )
175
+ )
176
+ rect = map_element.rect
177
+ location = map_element.location
178
+ return {
179
+ "x": location["x"],
180
+ "y": location["y"],
181
+ "width": rect["width"],
182
+ "height": rect["height"],
183
+ "element": map_element,
184
+ }
185
+ except Exception as e:
186
+ # 这个函数在benchmark中不是必须的,只是GeoBot初始化需要,可以优雅地失败
187
+ # print(f"⚠️ Could not get map element info: {e}")
188
+ return {}
189
+
190
+ def take_street_view_screenshot(self) -> Optional[bytes]:
191
+ """Take screenshot of the Street View area"""
192
+ try:
193
+ assert self.wait is not None
194
+ pano_element = self.wait.until(
195
+ EC.presence_of_element_located(
196
+ (By.CSS_SELECTOR, SELECTORS["pano_container"])
197
+ )
198
+ )
199
+ return pano_element.screenshot_as_png
200
+ except Exception as e:
201
+ print(f"❌ Error taking screenshot: {e}")
202
+ return None
203
+
204
+ # **新增**: 获取实时页面标识符的方法
205
+ def get_live_location_identifiers(self) -> Dict:
206
+ """Executes JS to get the identifiers of the CURRENTLY displayed location."""
207
+ try:
208
+ assert self.driver is not None
209
+ # 调用网站自己的JS函数来获取实时链接
210
+ live_identifiers = self.driver.execute_script("""
211
+ try {
212
+ return {
213
+ permLink: getPermLink(), // 调用网站自己的函数
214
+ panoId: window.panorama.getPano(),
215
+ urlString: urlSlug() // 调用网站自己的函数
216
+ };
217
+ } catch (e) {
218
+ return { error: e.toString() };
219
+ }
220
+ """)
221
+ return live_identifiers
222
+ except Exception as e:
223
+ print(f"❌ Error getting live identifiers: {e}")
224
+ return {}
225
+
226
+ # **修改**: 增强 load_location_from_data
227
+ def load_location_from_data(self, location_data: Dict) -> bool:
228
+ """Load a specific location by navigating to its permanent link."""
229
+ try:
230
+ assert self.driver is not None
231
+
232
+ # **优先使用 perm_link 或 url (现在应该已经是正确的了)**
233
+ url_to_load = location_data.get("perm_link") or location_data.get("url")
234
+
235
+ if url_to_load and "/p/" in url_to_load:
236
+ print(f"✅ Loading location via perm_link: {url_to_load}")
237
+ self.driver.get(url_to_load)
238
+ time.sleep(3) # 等待场景加载
239
+ return True
240
+
241
+ # **备用方案: 根据坐标和视角手动构建链接 (来自您建议的格式)**
242
+ lat = location_data.get("lat")
243
+ lng = location_data.get("lng")
244
+ if lat and lng:
245
+ # 尝试从 identifiers 中获取视角信息
246
+ pov = "232.46_-5_0" # 默认视角
247
+ # 注意: 采集时也应该保存 pov 信息,此处为简化
248
+ url_slug = f"{lat}_{lng}_{pov}"
249
+ url_to_load = f"{MAPCRUNCH_URL}/p/{url_slug}"
250
+ print(f"✅ Loading location by constructing URL: {url_to_load}")
251
+ self.driver.get(url_to_load)
252
+ time.sleep(3)
253
+ return True
254
+
255
+ print(
256
+ "⚠️ No valid location identifier (perm_link, url, or coords) found in data."
257
+ )
258
+ return False
259
+
260
+ except Exception as e:
261
+ print(f"❌ Error loading location: {e}")
262
+ return False
263
+
264
+ def close(self):
265
+ if self.driver:
266
+ self.driver.quit()
267
+
268
+ def __enter__(self):
269
+ return self
270
+
271
+ def __exit__(self, exc_type, exc_val, exc_tb):
272
+ self.close()
pyproject.toml ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "simple-g-ai-bot"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "aiohappyeyeballs==2.4.3",
9
+ "aiohttp==3.10.10",
10
+ "aiosignal==1.3.1",
11
+ "annotated-types==0.7.0",
12
+ "anthropic==0.36.2",
13
+ "anyio==4.6.2.post1",
14
+ "asttokens==2.4.1",
15
+ "attrs==24.2.0",
16
+ "cachetools==5.5.0",
17
+ "certifi==2024.8.30",
18
+ "charset-normalizer==3.4.0",
19
+ "colorama==0.4.6",
20
+ "comm==0.2.2",
21
+ "contourpy==1.3.0",
22
+ "cycler==0.12.1",
23
+ "debugpy==1.8.7",
24
+ "decorator==5.1.1",
25
+ "defusedxml==0.7.1",
26
+ "distro==1.9.0",
27
+ "executing==2.1.0",
28
+ "filelock==3.16.1",
29
+ "fonttools==4.54.1",
30
+ "frozenlist==1.4.1",
31
+ "fsspec==2024.9.0",
32
+ "google-ai-generativelanguage==0.6.10",
33
+ "google-api-core==2.21.0",
34
+ "google-api-python-client==2.149.0",
35
+ "google-auth==2.35.0",
36
+ "google-auth-httplib2==0.2.0",
37
+ "google-generativeai==0.8.3",
38
+ "googleapis-common-protos==1.65.0",
39
+ "greenlet==3.1.1",
40
+ "grpcio==1.67.0",
41
+ "grpcio-status==1.67.0",
42
+ "h11==0.14.0",
43
+ "httpcore==1.0.6",
44
+ "httplib2==0.22.0",
45
+ "httpx==0.27.2",
46
+ "huggingface-hub==0.26.0",
47
+ "idna==3.10",
48
+ "iprogress==0.4",
49
+ "ipykernel==6.29.5",
50
+ "ipython==8.28.0",
51
+ "jedi==0.19.1",
52
+ "jiter==0.6.1",
53
+ "jsonpatch==1.33",
54
+ "jsonpointer==3.0.0",
55
+ "jupyter-client==8.6.3",
56
+ "jupyter-core==5.7.2",
57
+ "kiwisolver==1.4.7",
58
+ "langchain==0.3.4",
59
+ "langchain-anthropic==0.2.3",
60
+ "langchain-core==0.3.12",
61
+ "langchain-google-genai==2.0.1",
62
+ "langchain-openai==0.2.3",
63
+ "langchain-text-splitters==0.3.0",
64
+ "langsmith==0.1.136",
65
+ "matplotlib==3.9.2",
66
+ "matplotlib-inline==0.1.7",
67
+ "mouseinfo==0.1.3",
68
+ "multidict==6.1.0",
69
+ "nest-asyncio==1.6.0",
70
+ "numpy==1.26.4",
71
+ "openai==1.52.0",
72
+ "opencv-python==4.10.0.84",
73
+ "orjson==3.10.9",
74
+ "packaging==24.1",
75
+ "parso==0.8.4",
76
+ "pillow==11.0.0",
77
+ "platformdirs==4.3.6",
78
+ "prompt-toolkit==3.0.48",
79
+ "propcache==0.2.0",
80
+ "proto-plus==1.24.0",
81
+ "protobuf==5.28.2",
82
+ "psutil==6.1.0",
83
+ "pure-eval==0.2.3",
84
+ "pyasn1==0.6.1",
85
+ "pyasn1-modules==0.4.1",
86
+ "pyautogui==0.9.54",
87
+ "pydantic==2.9.2",
88
+ "pydantic-core==2.23.4",
89
+ "pygetwindow==0.0.9",
90
+ "pygments==2.18.0",
91
+ "pymsgbox==1.0.9",
92
+ "pynput==1.7.7",
93
+ "pyparsing==3.2.0",
94
+ "pyperclip==1.9.0",
95
+ "pyrect==0.2.0",
96
+ "pyscreeze==1.0.1",
97
+ "python-dateutil==2.9.0.post0",
98
+ "python-dotenv==1.0.1",
99
+ "pytweening==1.2.0",
100
+ "pyyaml==6.0.2",
101
+ "pyzmq==26.2.0",
102
+ "regex==2024.9.11",
103
+ "requests==2.32.3",
104
+ "requests-toolbelt==1.0.0",
105
+ "rsa==4.9",
106
+ "selenium>=4.32.0",
107
+ "six==1.16.0",
108
+ "sniffio==1.3.1",
109
+ "sqlalchemy==2.0.36",
110
+ "stack-data==0.6.3",
111
+ "tenacity==9.0.0",
112
+ "tiktoken==0.8.0",
113
+ "tokenizers==0.20.1",
114
+ "tornado==6.4.1",
115
+ "tqdm==4.66.5",
116
+ "traitlets==5.14.3",
117
+ "typing-extensions==4.12.2",
118
+ "uritemplate==4.1.1",
119
+ "urllib3==2.2.3",
120
+ "wcwidth==0.2.13",
121
+ "yarl==1.15.5",
122
+ ]
readme.md CHANGED
@@ -1,6 +1,4 @@
1
- ## GeoGuessr AI Bot
2
 
3
- In this project you will see how to program a Python AI Bot that uses PyAutoGUI, LangChain and Vision LLMs (GPT-4o, Gemini 1.5 and Claude 3.5) to automatically play to the GeoGuessr game.
4
 
5
- [YouTube Video](https://www.youtube.com/watch?v=OyDfr0xIhss)
6
- [Medium Blog](https://medium.com/@enricdomingo/coding-a-geoguessr-autonomous-ai-bot-with-vision-llms-gpt-4o-claude-3-5-and-gemini-1-5-908faf3bc3c7)
 
1
+ python main.py --mode data --samples 50 --urban --no-indoor
2
 
 
3
 
4
+ python main.py --mode benchmark --models gpt-4o
 
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
select_regions.py DELETED
@@ -1,66 +0,0 @@
1
- import pyautogui
2
- from pynput import keyboard
3
- import yaml
4
-
5
-
6
- regions = [
7
- "screen_top_left",
8
- "screen_bot_right",
9
- ]
10
-
11
- map_regions = [
12
- "map_top_left",
13
- "map_bot_right",
14
- "confirm_button",
15
- "kodiak",
16
- "hobart",
17
- ]
18
-
19
- next_round_button = "next_round_button"
20
-
21
- coords = []
22
-
23
- PRESS_KEY = "a"
24
-
25
-
26
- def on_press(key):
27
- try:
28
- if key.char == PRESS_KEY:
29
- x, y = pyautogui.position()
30
- print(x, y)
31
- coords.append([x, y])
32
- return False
33
- except AttributeError:
34
- pass
35
-
36
-
37
- def get_coords(players=1):
38
- for region in regions:
39
- print(f"Move the mouse to the {region} region and press 'a'.")
40
- with keyboard.Listener(on_press=on_press) as keyboard_listener:
41
- keyboard_listener.join(timeout=40)
42
-
43
- for p in range(1, players+1):
44
- for region in map_regions:
45
- region = region + f"_{p}"
46
- regions.append(region)
47
- print(f"Move the mouse to the {region} region and press 'a'.")
48
- with keyboard.Listener(on_press=on_press) as keyboard_listener:
49
- keyboard_listener.join(timeout=40)
50
-
51
- regions.append(next_round_button)
52
- print(f"Move the mouse to the {next_round_button} region and press 'a'.")
53
- with keyboard.Listener(on_press=on_press) as keyboard_listener:
54
- keyboard_listener.join(timeout=40)
55
-
56
- screen_regions = {reg: coord for reg, coord in zip(regions, coords)}
57
-
58
- # save dict as a yaml file
59
- with open("screen_regions.yaml", "w") as f:
60
- yaml.dump(screen_regions, f)
61
-
62
- return screen_regions
63
-
64
-
65
- if __name__ == "__main__":
66
- _ = get_coords(players=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
view_data_collect.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Quick viewer for collected MapCrunch data
4
+ """
5
+
6
+ import json
7
+ import os
8
+ from pathlib import Path
9
+ from PIL import Image
10
+ import matplotlib.pyplot as plt
11
+ import matplotlib.image as mpimg
12
+ from collections import Counter
13
+
14
+ def view_data_summary(data_file='data/golden_labels.json'):
15
+ """Display summary of collected data"""
16
+
17
+ try:
18
+ with open(data_file, 'r') as f:
19
+ data = json.load(f)
20
+ except FileNotFoundError:
21
+ print(f"❌ No data file found at {data_file}")
22
+ print("💡 Run data collection first: python main.py --mode data --samples 50")
23
+ return
24
+
25
+ samples = data.get('samples', [])
26
+ metadata = data.get('metadata', {})
27
+
28
+ print(f"📊 MapCrunch Data Collection Summary")
29
+ print(f"{'='*50}")
30
+ print(f"📅 Collection Date: {metadata.get('collection_date', 'Unknown')}")
31
+ print(f"📍 Total Samples: {len(samples)}")
32
+ print(f"🏙️ Collection Options: {metadata.get('collection_options', {})}")
33
+
34
+ # Statistics
35
+ stats = metadata.get('statistics', {})
36
+ if stats:
37
+ print(f"\n📈 Statistics:")
38
+ for key, value in stats.items():
39
+ print(f" {key}: {value}")
40
+
41
+ # Country distribution
42
+ countries = []
43
+ for sample in samples:
44
+ address = sample.get('address', '')
45
+ if address and address != 'Unknown':
46
+ # Extract country (usually last part after comma)
47
+ country = address.split(', ')[-1].strip()
48
+ countries.append(country)
49
+
50
+ if countries:
51
+ country_counts = Counter(countries)
52
+ print(f"\n🌍 Top Countries:")
53
+ for country, count in country_counts.most_common(10):
54
+ print(f" {country}: {count} samples")
55
+
56
+ # Coordinate coverage
57
+ coords_available = sum(1 for s in samples if s.get('lat') is not None)
58
+ print(f"\n📍 Coordinate Coverage: {coords_available}/{len(samples)} ({coords_available/len(samples)*100:.1f}%)")
59
+
60
+ # Thumbnail coverage
61
+ thumbnails_available = sum(1 for s in samples if s.get('has_thumbnail'))
62
+ print(f"📸 Thumbnail Coverage: {thumbnails_available}/{len(samples)} ({thumbnails_available/len(samples)*100:.1f}%)")
63
+
64
+ # Sample locations
65
+ print(f"\n📍 Sample Locations:")
66
+ for i, sample in enumerate(samples[:10]):
67
+ address = sample.get('address', 'Unknown')
68
+ lat = sample.get('lat', 'N/A')
69
+ lng = sample.get('lng', 'N/A')
70
+ has_thumb = "📸" if sample.get('has_thumbnail') else "❌"
71
+ print(f" {i+1}. {has_thumb} {address} ({lat}, {lng})")
72
+
73
+ if len(samples) > 10:
74
+ print(f" ... and {len(samples) - 10} more")
75
+
76
+
77
+ def create_thumbnail_gallery(data_file='data/golden_labels.json', output_file='data/gallery.html', max_images=100):
78
+ """Create an HTML gallery of collected thumbnails"""
79
+
80
+ with open(data_file, 'r') as f:
81
+ data = json.load(f)
82
+
83
+ samples = data.get('samples', [])
84
+
85
+ html = """
86
+ <html>
87
+ <head>
88
+ <title>MapCrunch Collection Gallery</title>
89
+ <style>
90
+ body { font-family: Arial, sans-serif; background: #f0f0f0; }
91
+ h1 { text-align: center; }
92
+ .gallery { display: flex; flex-wrap: wrap; justify-content: center; }
93
+ .item {
94
+ margin: 10px;
95
+ background: white;
96
+ padding: 10px;
97
+ border-radius: 8px;
98
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
99
+ text-align: center;
100
+ }
101
+ .item img { max-width: 320px; border-radius: 4px; }
102
+ .address { font-weight: bold; margin: 5px 0; }
103
+ .coords { font-size: 0.9em; color: #666; }
104
+ .stats { margin: 20px; text-align: center; }
105
+ </style>
106
+ </head>
107
+ <body>
108
+ <h1>MapCrunch Collection Gallery</h1>
109
+ """
110
+
111
+ # Add statistics
112
+ total = len(samples)
113
+ with_thumb = sum(1 for s in samples if s.get('has_thumbnail'))
114
+ with_coords = sum(1 for s in samples if s.get('lat') is not None)
115
+
116
+ html += f"""
117
+ <div class="stats">
118
+ <p>Total Samples: {total} | With Thumbnails: {with_thumb} | With Coordinates: {with_coords}</p>
119
+ </div>
120
+ <div class="gallery">
121
+ """
122
+
123
+ # Add thumbnails
124
+ count = 0
125
+ for sample in samples:
126
+ if count >= max_images:
127
+ break
128
+
129
+ if sample.get('thumbnail_path'):
130
+ thumb_path = f"thumbnails/{sample['thumbnail_path']}"
131
+ address = sample.get('address', 'Unknown')
132
+ lat = sample.get('lat', 'N/A')
133
+ lng = sample.get('lng', 'N/A')
134
+
135
+ html += f"""
136
+ <div class="item">
137
+ <img src="{thumb_path}" alt="{address}">
138
+ <div class="address">{address}</div>
139
+ <div class="coords">{lat}, {lng}</div>
140
+ </div>
141
+ """
142
+ count += 1
143
+
144
+ html += """
145
+ </div>
146
+ </body>
147
+ </html>
148
+ """
149
+
150
+ with open(output_file, 'w') as f:
151
+ f.write(html)
152
+
153
+ print(f"✅ Gallery created: {output_file}")
154
+ print(f"📸 Included {count} images")
155
+ print(f"💡 Open in browser: file://{os.path.abspath(output_file)}")
156
+
157
+
158
+ def plot_thumbnails_grid(data_file='data/golden_labels.json', max_images=20):
159
+ """Display a grid of thumbnails using matplotlib"""
160
+
161
+ with open(data_file, 'r') as f:
162
+ data = json.load(f)
163
+
164
+ samples = [s for s in data['samples'] if s.get('thumbnail_path')][:max_images]
165
+
166
+ if not samples:
167
+ print("❌ No samples with thumbnails found")
168
+ return
169
+
170
+ # Create grid
171
+ cols = 5
172
+ rows = (len(samples) + cols - 1) // cols
173
+
174
+ fig, axes = plt.subplots(rows, cols, figsize=(15, rows * 3))
175
+ if rows == 1:
176
+ axes = axes.reshape(1, -1)
177
+
178
+ for i, sample in enumerate(samples):
179
+ row = i // cols
180
+ col = i % cols
181
+
182
+ thumb_path = f"data/thumbnails/{sample['thumbnail_path']}"
183
+ if os.path.exists(thumb_path):
184
+ img = mpimg.imread(thumb_path)
185
+ axes[row, col].imshow(img)
186
+ axes[row, col].set_title(sample.get('address', 'Unknown')[:30] + '...', fontsize=8)
187
+
188
+ axes[row, col].axis('off')
189
+
190
+ # Hide empty subplots
191
+ for i in range(len(samples), rows * cols):
192
+ row = i // cols
193
+ col = i % cols
194
+ axes[row, col].axis('off')
195
+
196
+ plt.tight_layout()
197
+ plt.suptitle(f'MapCrunch Collection Sample ({len(samples)} locations)', y=1.02)
198
+ plt.show()
199
+
200
+
201
+ def export_coordinates_csv(data_file='data/golden_labels.json', output_file='data/coordinates.csv'):
202
+ """Export coordinates to CSV for mapping"""
203
+
204
+ import csv
205
+
206
+ with open(data_file, 'r') as f:
207
+ data = json.load(f)
208
+
209
+ samples = data.get('samples', [])
210
+
211
+ with open(output_file, 'w', newline='', encoding='utf-8') as f:
212
+ writer = csv.writer(f)
213
+ writer.writerow(['id', 'address', 'latitude', 'longitude', 'has_thumbnail'])
214
+
215
+ count = 0
216
+ for sample in samples:
217
+ if sample.get('lat') is not None and sample.get('lng') is not None:
218
+ writer.writerow([
219
+ sample['id'][:8],
220
+ sample.get('address', 'Unknown'),
221
+ sample['lat'],
222
+ sample['lng'],
223
+ 'Yes' if sample.get('has_thumbnail') else 'No'
224
+ ])
225
+ count += 1
226
+
227
+ print(f"✅ Exported {count} coordinates to {output_file}")
228
+
229
+
230
+ def main():
231
+ import argparse
232
+
233
+ parser = argparse.ArgumentParser(description='View collected MapCrunch data')
234
+ parser.add_argument('--gallery', action='store_true', help='Create HTML gallery')
235
+ parser.add_argument('--grid', action='store_true', help='Show thumbnail grid')
236
+ parser.add_argument('--csv', action='store_true', help='Export coordinates to CSV')
237
+ parser.add_argument('--data', default='data/golden_labels.json', help='Data file path')
238
+ parser.add_argument('--max-images', type=int, default=50, help='Max images for gallery/grid')
239
+
240
+ args = parser.parse_args()
241
+
242
+ if not os.path.exists(args.data):
243
+ print(f"❌ Data file not found: {args.data}")
244
+ print("💡 Run data collection first: python main.py --mode data --samples 50")
245
+ return
246
+
247
+ # Always show summary
248
+ view_data_summary(args.data)
249
+
250
+ # Additional actions
251
+ if args.gallery:
252
+ print()
253
+ create_thumbnail_gallery(args.data, max_images=args.max_images)
254
+
255
+ if args.grid:
256
+ print()
257
+ plot_thumbnails_grid(args.data, max_images=args.max_images)
258
+
259
+ if args.csv:
260
+ print()
261
+ export_coordinates_csv(args.data)
262
+
263
+
264
+ if __name__ == "__main__":
265
+ main()