Spaces:

Chrisyichuan
/

Omniscient

Building

App Files Files Community

Andy Lee commited on Jun 7

Commit

f83d6df

1 Parent(s): 5558ad1

Merge pull request #1 from yichuan520030910320/mapcrunch

Browse files

Files changed (16) hide show

.DS_Store +0 -0
.gitignore +4 -1
.python-version +1 -0
benchmark.py +227 -0
config.py +92 -0
data_collector.py +498 -0
geo_bot.py +208 -0
geoguessr_bot.py +0 -200
main.py +255 -0
mapcrunch_controller.py +272 -0
pyproject.toml +122 -0
readme.md +2 -4
requirements.txt +0 -0
select_regions.py +0 -66
uv.lock +0 -0
view_data_collect.py +265 -0

.DS_Store DELETED Viewed

Binary file (6.15 kB)

.gitignore CHANGED Viewed

@@ -1,3 +1,6 @@
 venv/
 .env
-__pycache__

 venv/
 .env
+__pycache__
+.DS_Store
+data/
+results/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11

benchmark.py ADDED Viewed

	@@ -0,0 +1,227 @@

+# benchmark.py (Final Version)
+import os
+import json
+import time
+from datetime import datetime
+from typing import List, Dict, Optional, Tuple
+from pathlib import Path
+import math
+from geo_bot import GeoBot
+from config import DATA_PATHS, MODELS_CONFIG, SUCCESS_THRESHOLD_KM
+class MapGuesserBenchmark:
+    def __init__(self, headless: bool = False):
+        self.headless = headless
+        self.golden_labels = self.load_golden_labels()
+        print(f"📊 Loaded {len(self.golden_labels)} golden label samples")
+    def load_golden_labels(self) -> List[Dict]:
+        try:
+            with open(DATA_PATHS["golden_labels"], "r") as f:
+                return json.load(f).get("samples", [])
+        except Exception:
+            return []
+    def get_model_class(self, model_name: str):
+        config = MODELS_CONFIG.get(model_name)
+        if not config:
+            raise ValueError(f"Unknown model: {model_name}")
+        class_name, model_class_name = config["class"], config["model_name"]
+        if class_name == "ChatOpenAI":
+            from langchain_openai import ChatOpenAI
+            return ChatOpenAI, model_class_name
+        if class_name == "ChatAnthropic":
+            from langchain_anthropic import ChatAnthropic
+            return ChatAnthropic, model_class_name
+        if class_name == "ChatGoogleGenerativeAI":
+            from langchain_google_genai import ChatGoogleGenerativeAI
+            return ChatGoogleGenerativeAI, model_class_name
+        raise ValueError(f"Unknown model class: {class_name}")
+    def calculate_distance(
+        self, true_coords: Dict, predicted_coords: Optional[Tuple[float, float]]
+    ) -> Optional[float]:
+        """Calculates distance between true (lat,lon) and predicted (lat,lon)."""
+        if not predicted_coords:
+            return None
+        try:
+            true_lat, true_lng = true_coords["lat"], true_coords["lng"]
+            pred_lat, pred_lng = predicted_coords
+            R = 6371
+            lat1, lon1, lat2, lon2 = map(
+                math.radians, [true_lat, true_lng, pred_lat, pred_lng]
+            )
+            dlat = lat2 - lat1
+            dlon = lon2 - lon1
+            a = (
+                math.sin(dlat / 2) ** 2
+                + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
+            )
+            c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
+            return R * c
+        except (TypeError, KeyError, IndexError) as e:
+            print(f"Error in distance calculation: {e}")
+            return None
+    def run_benchmark(
+        self,
+        models: Optional[List[str]] = None,
+        max_samples: Optional[int] = None,
+        **kwargs,
+    ) -> Dict:
+        if not self.golden_labels:
+            raise ValueError("No golden labels available.")
+        models_to_test = models or list(MODELS_CONFIG.keys())
+        test_samples = self.golden_labels[:max_samples]
+        print(f"🚀 Starting LIVE benchmark:")
+        print(f"   Models: {models_to_test}")
+        print(f"   Samples: {len(test_samples)}")
+        all_results = []
+        for model_name in models_to_test:
+            print(f"\n🤖 Testing model: {model_name}")
+            model_class, model_class_name = self.get_model_class(model_name)
+            try:
+                with GeoBot(
+                    model=model_class,
+                    model_name=model_class_name,
+                    use_selenium=True,
+                    headless=self.headless,
+                ) as bot:
+                    for i, sample in enumerate(test_samples):
+                        print(f"   📍 Sample {i + 1}/{len(test_samples)}")
+                        try:
+                            result = self.run_single_test_with_bot(bot, sample)
+                            all_results.append(result)
+                            status = (
+                                "✅ Success" if result.get("success") else "❌ Failed"
+                            )
+                            distance = result.get("distance_km")
+                            dist_str = (
+                                f"{distance:.1f} km" if distance is not None else "N/A"
+                            )
+                            print(f"   {status} (Distance: {dist_str})")
+                        except KeyboardInterrupt:
+                            print("\n⏹️  Benchmark inner loop interrupted.")
+                            raise
+                        except Exception as e:
+                            print(f"   ❌ Test failed with unhandled exception: {e}")
+                            all_results.append(
+                                {
+                                    "model": model_name,
+                                    "sample_id": sample["id"],
+                                    "success": False,
+                                    "error": str(e),
+                                }
+                            )
+            except KeyboardInterrupt:
+                print("\n⏹️  Benchmark outer loop interrupted.")
+                break
+        self.save_results(all_results)
+        return self.generate_summary(all_results)
+    def run_single_test_with_bot(self, bot: GeoBot, location_data: Dict) -> Dict:
+        start_time = time.time()
+        assert bot.controller is not None
+        if not bot.controller.load_location_from_data(location_data):
+            return {
+                "success": False,
+                "error": "Failed to load location",
+                "model": bot.model_name,
+                "sample_id": location_data["id"],
+            }
+        screenshot = bot.take_screenshot()
+        if not screenshot:
+            return {
+                "success": False,
+                "error": "Failed to take screenshot",
+                "model": bot.model_name,
+                "sample_id": location_data["id"],
+            }
+        predicted_lat_lon = bot.analyze_image(screenshot)
+        inference_time = time.time() - start_time
+        true_coords = location_data["coordinates"]
+        distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
+        is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
+        return {
+            "sample_id": location_data["id"],
+            "model": bot.model_name,
+            "true_coordinates": true_coords,
+            "predicted_coordinates": predicted_lat_lon,
+            "distance_km": distance_km,
+            "inference_time": inference_time,
+            "success": is_success,
+        }
+    def save_results(self, results: List[Dict]):
+        if not results:
+            return
+        try:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            results_dir = Path(DATA_PATHS["results"])
+            results_dir.mkdir(parents=True, exist_ok=True)
+            results_file = results_dir / f"benchmark_results_{timestamp}.json"
+            output_data = {
+                "metadata": {"timestamp": datetime.now().isoformat()},
+                "results": results,
+            }
+            with open(results_file, "w") as f:
+                json.dump(output_data, f, indent=2, default=str)
+            print(f"💾 Results saved to {results_file}")
+        except Exception as e:
+            print(f"❌ Error saving results: {e}")
+    def generate_summary(self, results: List[Dict]) -> Dict:
+        summary = {}
+        by_model = {}
+        for r in results:
+            model = r.get("model", "unknown")
+            if model not in by_model:
+                by_model[model] = []
+            by_model[model].append(r)
+        for model, model_results in by_model.items():
+            successful_runs = [r for r in model_results if r.get("success")]
+            distances = [
+                r["distance_km"]
+                for r in model_results
+                if r.get("distance_km") is not None
+            ]
+            if not model_results:
+                continue
+            summary[model] = {
+                "success_rate": len(successful_runs) / len(model_results)
+                if model_results
+                else 0,
+                "average_distance_km": sum(distances) / len(distances)
+                if distances
+                else None,
+                "median_distance_km": sorted(distances)[len(distances) // 2]
+                if distances
+                else None,
+                "min_distance_km": min(distances) if distances else None,
+                "max_distance_km": max(distances) if distances else None,
+            }
+        return summary

config.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# Configuration file for MapCrunch benchmark
+SUCCESS_THRESHOLD_KM = 100
+# MapCrunch settings
+MAPCRUNCH_URL = "https://www.mapcrunch.com"
+# UI element selectors
+SELECTORS = {
+    "go_button": "#go-button",
+    "options_button": "#options-button",
+    "stealth_checkbox": "#stealth",
+    "urban_checkbox": "#cities",
+    "indoor_checkbox": "#inside",
+    "tour_checkbox": "#tour",
+    "auto_checkbox": "#auto",
+    "pano_container": "#pano",
+    "map_container": "#map",
+    "address_element": "#address",
+    "confirm_button": "#confirm-button",  # Will be determined dynamically
+    "country_list": "#countrylist",
+    "continent_links": "#continents a",
+}
+# MapCrunch collection options
+MAPCRUNCH_OPTIONS = {
+    "urban_only": True,  # Show urban areas only
+    "exclude_indoor": True,  # Exclude indoor views
+    "stealth_mode": True,  # Hide location info during gameplay
+    "tour_mode": False,  # 360 degree tour
+    "auto_mode": False,  # Automatic slideshow
+    "selected_countries": None,  # None means all, or list like ['us', 'gb', 'jp']
+    "selected_continents": None,  # None means all, or list like [1, 2]  # 1=N.America, 2=Europe, etc
+}
+# Data collection settings
+DATA_COLLECTION_CONFIG = {
+    "save_thumbnails": True,  # Save small screenshots
+    "thumbnail_size": (320, 240),  # Thumbnail dimensions
+    "save_full_screenshots": False,  # Save full resolution screenshots (storage intensive)
+    "extract_address": True,  # Extract address/location name
+    "wait_after_go": 3,  # Seconds to wait after clicking Go
+    "retry_on_failure": True,  # Retry if location fails
+    "max_retries": 3,  # Max retries per location
+}
+# Reference points for coordinate calibration (used in pyautogui coordinate system)
+REFERENCE_POINTS = {
+    "kodiak": {"lat": 57.7916, "lon": -152.4083},
+    "hobart": {"lat": -42.8833, "lon": 147.3355},
+}
+# Selenium settings
+SELENIUM_CONFIG = {
+    "headless": False,
+    "window_size": (1920, 1080),
+    "implicit_wait": 10,
+    "page_load_timeout": 30,
+}
+# Model configurations
+MODELS_CONFIG = {
+    "gpt-4o": {
+        "class": "ChatOpenAI",
+        "model_name": "gpt-4o",
+    },
+    "claude-3.5-sonnet": {
+        "class": "ChatAnthropic",
+        "model_name": "claude-3-5-sonnet-20241022",
+    },
+    "gemini-1.5-pro": {
+        "class": "ChatGoogleGenerativeAI",
+        "model_name": "gemini-1.5-pro",
+    },
+}
+# Benchmark settings
+BENCHMARK_CONFIG = {
+    "rounds_per_model": 50,
+    "data_collection_samples": 200,
+    "screenshot_delay": 2,
+    "click_delay": 1,
+}
+# Data paths
+DATA_PATHS = {
+    "golden_labels": "data/golden_labels.json",
+    "screenshots": "data/screenshots/",
+    "thumbnails": "data/thumbnails/",
+    "results": "results/",
+    "screen_regions": "screen_regions.yaml",  # Keep for backward compatibility
+}

data_collector.py ADDED Viewed

	@@ -0,0 +1,498 @@

+import os
+import json
+import time
+from datetime import datetime
+from typing import List, Dict, Optional
+from pathlib import Path
+import uuid
+from PIL import Image
+from io import BytesIO
+from mapcrunch_controller import MapCrunchController
+from config import (
+    DATA_PATHS,
+    BENCHMARK_CONFIG,
+    DATA_COLLECTION_CONFIG,
+    MAPCRUNCH_OPTIONS,
+)
+class DataCollector:
+    """Collect MapCrunch location identifiers, coordinates, and thumbnails"""
+    def __init__(self, headless: bool = False, options: Optional[Dict] = None):
+        self.controller = MapCrunchController(headless=headless)
+        self.data = []
+        self.options = options or MAPCRUNCH_OPTIONS
+        self.setup_directories()
+    def setup_directories(self):
+        """Create necessary directories for data storage"""
+        for path in DATA_PATHS.values():
+            if path.endswith("/"):
+                Path(path).mkdir(parents=True, exist_ok=True)
+            else:
+                Path(path).parent.mkdir(parents=True, exist_ok=True)
+    def collect_samples(
+        self, num_samples: Optional[int] = None, filter_indoor: Optional[bool] = None
+    ) -> List[Dict]:
+        """Collect specified number of MapCrunch locations with coordinates and thumbnails"""
+        if num_samples is None:
+            num_samples = BENCHMARK_CONFIG["data_collection_samples"]
+        # Override indoor filter if specified
+        if filter_indoor is not None:
+            self.options["exclude_indoor"] = filter_indoor
+        print(f"🚀 Starting location data collection for {num_samples} samples...")
+        print(
+            f"📍 Options: Urban={self.options.get('urban_only', False)}, Exclude Indoor={self.options.get('exclude_indoor', True)}"
+        )
+        # Setup MapCrunch options
+        if not self.controller.setup_collection_options(self.options):
+            print("⚠️  Could not configure all options, continuing anyway...")
+        # Setup clean environment for stealth mode if needed
+        if self.options.get("stealth_mode", True):
+            self.controller.setup_clean_environment()
+        successful_samples = 0
+        failed_samples = 0
+        consecutive_failures = 0
+        while successful_samples < num_samples:
+            try:
+                print(
+                    f"\n📍 Collecting location {successful_samples + 1}/{num_samples}"
+                )
+                # Get new random location
+                if not self.controller.click_go_button():
+                    print("❌ Failed to get new location")
+                    failed_samples += 1
+                    consecutive_failures += 1
+                    if consecutive_failures > 5:
+                        print("❌ Too many consecutive failures, stopping")
+                        break
+                    continue
+                # Wait for page to load
+                time.sleep(DATA_COLLECTION_CONFIG.get("wait_after_go", 5))
+                # Collect location data with retries
+                location_data = None
+                retries = (
+                    DATA_COLLECTION_CONFIG.get("max_retries", 3)
+                    if DATA_COLLECTION_CONFIG.get("retry_on_failure", True)
+                    else 1
+                )
+                for retry in range(retries):
+                    location_data = self.collect_single_location()
+                    if location_data:
+                        break
+                    if retry < retries - 1:
+                        print(f"   ⚠️  Retry {retry + 1}/{retries - 1}")
+                        time.sleep(1)
+                if location_data:
+                    self.data.append(location_data)
+                    successful_samples += 1
+                    consecutive_failures = 0
+                    # Display collected info
+                    address = location_data.get("address", "Unknown")
+                    lat, lng = location_data.get("lat"), location_data.get("lng")
+                    if lat and lng:
+                        print(
+                            f"✅ Location {successful_samples}: {address} ({lat:.4f}, {lng:.4f})"
+                        )
+                    else:
+                        print(f"✅ Location {successful_samples}: {address}")
+                    if location_data.get("thumbnail_path"):
+                        print(
+                            f"   📸 Thumbnail saved: {location_data['thumbnail_path']}"
+                        )
+                else:
+                    failed_samples += 1
+                    consecutive_failures += 1
+                    print("❌ Location collection failed")
+                # Brief pause between samples
+                time.sleep(0.5)
+            except KeyboardInterrupt:
+                print(
+                    f"\n⏹️  Collection stopped by user after {successful_samples} samples"
+                )
+                break
+            except Exception as e:
+                print(f"❌ Error collecting location: {e}")
+                failed_samples += 1
+                consecutive_failures += 1
+                continue
+        print("\n📊 Collection Summary:")
+        print(f"✅ Successful: {successful_samples}")
+        print(f"❌ Failed: {failed_samples}")
+        print(
+            f"📈 Success rate: {successful_samples / (successful_samples + failed_samples) * 100:.1f}%"
+        )
+        # Save collected data
+        self.save_data()
+        return self.data
+    def collect_single_location(self) -> Optional[Dict]:
+        """Collect a single location with all metadata"""
+        try:
+            sample_id = str(uuid.uuid4())
+            timestamp = datetime.now().isoformat()
+            assert self.controller.driver is not None
+            # 1. 获取实时坐标 (这个方法依然正确)
+            current_coords = self.controller.driver.execute_script(
+                "if (window.panorama) { return { lat: window.panorama.getPosition().lat(), lng: window.panorama.getPosition().lng() }; } else { return null; }"
+            )
+            if not current_coords or current_coords.get("lat") is None:
+                return None
+            # **2. 新增: 获取实时的链接和Pano ID**
+            live_identifiers = self.controller.get_live_location_identifiers()
+            if not live_identifiers or "error" in live_identifiers:
+                print(
+                    f"⚠️ Could not get live identifiers: {live_identifiers.get('error')}"
+                )
+                return None
+            # 3. 获取地址
+            address = self.controller.get_current_address()
+            # 4. 创建数据记录
+            location_data = {
+                "id": sample_id,
+                "timestamp": timestamp,
+                "coordinates": current_coords,
+                "lat": current_coords.get("lat"),
+                "lng": current_coords.get("lng"),
+                "address": address or "Unknown",
+                "source": "panorama_object",
+                # **使用新的实时标识符**
+                "url": live_identifiers.get("permLink"),
+                "perm_link": live_identifiers.get("permLink"),
+                "pano_id": live_identifiers.get("panoId"),
+                "url_slug": live_identifiers.get("urlString"),  # 新增，更可靠
+                "collection_options": self.options.copy(),
+            }
+            # ... (后续保存缩略图的代码不变) ...
+            if DATA_COLLECTION_CONFIG.get("save_thumbnails", True):
+                thumbnail_path = self.save_thumbnail(sample_id)
+                location_data["thumbnail_path"] = thumbnail_path
+                location_data["has_thumbnail"] = bool(thumbnail_path)
+            # Save full screenshot if configured (storage intensive)
+            if DATA_COLLECTION_CONFIG.get("save_full_screenshots", False):
+                screenshot_path = self.save_full_screenshot(sample_id)
+                if screenshot_path:
+                    location_data["screenshot_path"] = screenshot_path
+            return location_data
+        except Exception as e:
+            print(f"❌ Error in collect_single_location: {e}")
+            return None
+    def save_thumbnail(self, sample_id: str) -> Optional[str]:
+        """Save a thumbnail of the current Street View"""
+        try:
+            # Take screenshot
+            screenshot_bytes = self.controller.take_street_view_screenshot()
+            if not screenshot_bytes:
+                return None
+            # Convert to PIL Image
+            image = Image.open(BytesIO(screenshot_bytes))
+            # Resize to thumbnail size
+            thumbnail_size = DATA_COLLECTION_CONFIG.get("thumbnail_size", (320, 240))
+            image.thumbnail(thumbnail_size, Image.Resampling.LANCZOS)
+            # Save thumbnail
+            thumbnail_filename = f"{sample_id}.jpg"
+            thumbnail_path = os.path.join(DATA_PATHS["thumbnails"], thumbnail_filename)
+            # Convert to RGB if necessary (remove alpha channel)
+            if image.mode in ("RGBA", "LA"):
+                rgb_image = Image.new("RGB", image.size, (255, 255, 255))
+                rgb_image.paste(
+                    image, mask=image.split()[-1] if image.mode == "RGBA" else None
+                )
+                image = rgb_image
+            image.save(thumbnail_path, "JPEG", quality=85, optimize=True)
+            return thumbnail_filename
+        except Exception as e:
+            print(f"⚠️  Error saving thumbnail: {e}")
+            return None
+    def save_full_screenshot(self, sample_id: str) -> Optional[str]:
+        """Save full resolution screenshot (optional, storage intensive)"""
+        try:
+            screenshot_bytes = self.controller.take_street_view_screenshot()
+            if not screenshot_bytes:
+                return None
+            screenshot_filename = f"{sample_id}.png"
+            screenshot_path = os.path.join(
+                DATA_PATHS["screenshots"], screenshot_filename
+            )
+            with open(screenshot_path, "wb") as f:
+                f.write(screenshot_bytes)
+            return screenshot_filename
+        except Exception as e:
+            print(f"⚠️  Error saving screenshot: {e}")
+            return None
+    def save_data(self):
+        """Save collected location data to JSON file"""
+        try:
+            # Calculate statistics
+            stats = {
+                "total_samples": len(self.data),
+                "with_coordinates": sum(
+                    1 for d in self.data if d.get("lat") is not None
+                ),
+                "with_address": sum(
+                    1
+                    for d in self.data
+                    if d.get("address") and d["address"] != "Unknown"
+                ),
+                "with_thumbnails": sum(
+                    1 for d in self.data if d.get("has_thumbnail", False)
+                ),
+                "unique_countries": len(
+                    set(
+                        d.get("address", "").split(", ")[-1]
+                        for d in self.data
+                        if d.get("address")
+                    )
+                ),
+            }
+            output_data = {
+                "metadata": {
+                    "collection_date": datetime.now().isoformat(),
+                    "total_samples": len(self.data),
+                    "statistics": stats,
+                    "collection_options": self.options,
+                    "version": "3.0",
+                    "description": "MapCrunch location data with thumbnails and metadata",
+                },
+                "samples": self.data,
+            }
+            with open(DATA_PATHS["golden_labels"], "w") as f:
+                json.dump(output_data, f, indent=2)
+            print(f"\n💾 Location data saved to {DATA_PATHS['golden_labels']}")
+            print("📊 Statistics:")
+            for key, value in stats.items():
+                print(f"   {key}: {value}")
+        except Exception as e:
+            print(f"❌ Error saving data: {e}")
+    def load_existing_data(self) -> List[Dict]:
+        """Load existing location data"""
+        try:
+            if os.path.exists(DATA_PATHS["golden_labels"]):
+                with open(DATA_PATHS["golden_labels"], "r") as f:
+                    data = json.load(f)
+                return data.get("samples", [])
+            else:
+                return []
+        except Exception as e:
+            print(f"❌ Error loading existing data: {e}")
+            return []
+    def validate_sample(self, sample: Dict) -> bool:
+        """Validate that a sample has required fields"""
+        required_fields = ["id", "coordinates"]
+        # Check required fields
+        if not all(field in sample for field in required_fields):
+            return False
+        # Check if coordinates are valid
+        coords = sample["coordinates"]
+        if coords.get("lat") is None or coords.get("lng") is None:
+            if coords.get("address") is None:
+                return False
+        return True
+    def clean_invalid_samples(self):
+        """Remove invalid samples from dataset"""
+        existing_data = self.load_existing_data()
+        valid_samples = [
+            sample for sample in existing_data if self.validate_sample(sample)
+        ]
+        print(
+            f"🧹 Cleaned dataset: {len(existing_data)} -> {len(valid_samples)} samples"
+        )
+        if len(valid_samples) != len(existing_data):
+            # Save cleaned data
+            self.data = valid_samples
+            self.save_data()
+    def filter_samples(self, filter_func=None, country=None, has_coordinates=None):
+        """Filter existing samples based on criteria"""
+        samples = self.load_existing_data()
+        filtered = samples
+        # Filter by country
+        if country:
+            filtered = [
+                s for s in filtered if country.lower() in s.get("address", "").lower()
+            ]
+        # Filter by coordinate availability
+        if has_coordinates is not None:
+            if has_coordinates:
+                filtered = [
+                    s
+                    for s in filtered
+                    if s.get("lat") is not None and s.get("lng") is not None
+                ]
+            else:
+                filtered = [
+                    s for s in filtered if s.get("lat") is None or s.get("lng") is None
+                ]
+        # Apply custom filter
+        if filter_func:
+            filtered = [s for s in filtered if filter_func(s)]
+        print(f"🔍 Filtered: {len(samples)} -> {len(filtered)} samples")
+        return filtered
+    def export_summary(self, output_file: str = "data_summary.txt"):
+        """Export a human-readable summary of collected data"""
+        samples = self.load_existing_data()
+        with open(output_file, "w") as f:
+            f.write("MapCrunch Data Collection Summary\n")
+            f.write("=" * 50 + "\n\n")
+            for i, sample in enumerate(samples):
+                f.write(f"Sample {i + 1}:\n")
+                f.write(f"  ID: {sample['id'][:8]}...\n")
+                f.write(f"  Address: {sample.get('address', 'Unknown')}\n")
+                f.write(
+                    f"  Coordinates: {sample.get('lat', 'N/A')}, {sample.get('lng', 'N/A')}\n"
+                )
+                f.write(
+                    f"  Thumbnail: {'Yes' if sample.get('has_thumbnail') else 'No'}\n"
+                )
+                f.write(f"  Collected: {sample.get('timestamp', 'Unknown')}\n")
+                f.write("-" * 30 + "\n")
+        print(f"📄 Summary exported to {output_file}")
+    def close(self):
+        """Clean up resources"""
+        self.controller.close()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+def main():
+    """Main function for data collection"""
+    import argparse
+    parser = argparse.ArgumentParser(
+        description="Collect MapCrunch location data for benchmark"
+    )
+    parser.add_argument(
+        "--samples", type=int, default=50, help="Number of locations to collect"
+    )
+    parser.add_argument(
+        "--headless", action="store_true", help="Run browser in headless mode"
+    )
+    parser.add_argument(
+        "--clean", action="store_true", help="Clean invalid samples from existing data"
+    )
+    parser.add_argument(
+        "--urban", action="store_true", help="Collect only urban locations"
+    )
+    parser.add_argument("--no-indoor", action="store_true", help="Exclude indoor views")
+    parser.add_argument(
+        "--countries",
+        nargs="+",
+        help="Specific countries to collect from (e.g., us gb jp)",
+    )
+    parser.add_argument(
+        "--export-summary", action="store_true", help="Export summary of collected data"
+    )
+    parser.add_argument(
+        "--filter-country", help="Filter samples by country when exporting"
+    )
+    args = parser.parse_args()
+    if args.clean:
+        print("🧹 Cleaning existing dataset...")
+        with DataCollector(headless=True) as collector:
+            collector.clean_invalid_samples()
+        return
+    if args.export_summary:
+        print("📄 Exporting data summary...")
+        with DataCollector(headless=True) as collector:
+            if args.filter_country:
+                samples = collector.filter_samples(country=args.filter_country)
+                collector.data = samples
+                collector.export_summary(f"data_summary_{args.filter_country}.txt")
+            else:
+                collector.export_summary()
+        return
+    # Configure collection options
+    options = MAPCRUNCH_OPTIONS.copy()
+    if args.urban:
+        options["urban_only"] = True
+    if args.no_indoor:
+        options["exclude_indoor"] = True
+    if args.countries:
+        options["selected_countries"] = args.countries
+    # Collect new location data
+    with DataCollector(headless=args.headless, options=options) as collector:
+        data = collector.collect_samples(args.samples)
+        print(f"\n🎉 Collection complete! Collected {len(data)} location samples.")
+        print("📊 Ready for benchmark testing with these locations.")
+if __name__ == "__main__":
+    main()

geo_bot.py ADDED Viewed

	@@ -0,0 +1,208 @@

+# geo_bot.py (Final Version)
+from io import BytesIO
+import os
+import dotenv
+import base64
+import re  # 导入 re 模块
+from typing import Tuple, List, Optional
+from PIL import Image
+from langchain_core.messages import HumanMessage, BaseMessage
+from langchain_openai import ChatOpenAI
+from langchain_anthropic import ChatAnthropic
+from langchain_google_genai import ChatGoogleGenerativeAI
+from mapcrunch_controller import MapCrunchController
+from config import REFERENCE_POINTS
+dotenv.load_dotenv()
+PROMPT_INSTRUCTIONS = """
+Try to predict where the image was taken.
+First describe the relevant details in the image to do it.
+List some regions and places where it could be.
+Choose the most likely Country and City or Specific Location.
+At the end, in the last line apart from the previous reasoning, write the Latitude and Longitude from that guessed location
+using the following format, making sure that the coords are valid floats, without anything else and making sure to be consistent with the format:
+Lat: XX.XXXX, Lon: XX.XXXX
+"""
+class GeoBot:
+    prompt_instructions: str = PROMPT_INSTRUCTIONS
+    def __init__(
+        self, model=ChatOpenAI, model_name="gpt-4o", use_selenium=True, headless=False
+    ):
+        self.model = model(model=model_name)
+        self.model_name = model_name
+        self.use_selenium = use_selenium
+        self.controller = (
+            MapCrunchController(headless=headless) if use_selenium else None
+        )
+        # Get screen and map regions
+        if use_selenium:
+            self._setup_screen_regions()
+        else:
+            # Fallback to manual regions (backward compatibility)
+            self._load_manual_regions()
+        # Reference points for coordinate calibration
+        self.kodiak_lat, self.kodiak_lon = (
+            REFERENCE_POINTS["kodiak"]["lat"],
+            REFERENCE_POINTS["kodiak"]["lon"],
+        )
+        self.hobart_lat, self.hobart_lon = (
+            REFERENCE_POINTS["hobart"]["lat"],
+            REFERENCE_POINTS["hobart"]["lon"],
+        )
+    def _setup_screen_regions(self):
+        """Setup screen regions using Selenium element positions"""
+        try:
+            # Get map element info
+            map_info = self.controller.get_map_element_info()
+            # Convert browser coordinates to screen coordinates
+            self.map_x = map_info["x"]
+            self.map_y = map_info["y"]
+            self.map_w = map_info["width"]
+            self.map_h = map_info["height"]
+            # Set screen capture region (full window)
+            window_size = self.controller.driver.get_window_size()
+            self.screen_x, self.screen_y = 0, 0
+            self.screen_w = window_size["width"]
+            self.screen_h = window_size["height"]
+            # Reference points for coordinate conversion (approximate map positions)
+            # These would need to be calibrated for MapCrunch's specific map projection
+            self.kodiak_x = self.map_x + int(self.map_w * 0.1)  # Approximate
+            self.kodiak_y = self.map_y + int(self.map_h * 0.2)
+            self.hobart_x = self.map_x + int(self.map_w * 0.9)
+            self.hobart_y = self.map_y + int(self.map_h * 0.8)
+            print(
+                f"📍 Screen regions setup: Map({self.map_x},{self.map_y},{self.map_w},{self.map_h})"
+            )
+        except Exception as e:
+            print(f"⚠️  Warning: Could not setup screen regions via Selenium: {e}")
+            self._load_manual_regions()
+    def _load_manual_regions(self):
+        """Fallback to manual screen regions (backward compatibility)"""
+        import yaml
+        try:
+            with open("screen_regions.yaml") as f:
+                screen_regions = yaml.safe_load(f)
+            self.screen_x, self.screen_y = screen_regions["screen_top_left"]
+            self.screen_w = screen_regions["screen_bot_right"][0] - self.screen_x
+            self.screen_h = screen_regions["screen_bot_right"][1] - self.screen_y
+            self.map_x, self.map_y = screen_regions["map_top_left_1"]
+            self.map_w = screen_regions["map_bot_right_1"][0] - self.map_x
+            self.map_h = screen_regions["map_bot_right_1"][1] - self.map_y
+            self.kodiak_x, self.kodiak_y = screen_regions["kodiak_1"]
+            self.hobart_x, self.hobart_y = screen_regions["hobart_1"]
+        except FileNotFoundError:
+            print("❌ No screen_regions.yaml found and Selenium setup failed")
+            raise
+    @staticmethod
+    def pil_to_base64(image: Image) -> str:
+        buffered = BytesIO()
+        image.save(buffered, format="PNG")
+        return base64.b64encode(buffered.getvalue()).decode("utf-8")
+    @classmethod
+    def create_message(cls, images_data: List[str]) -> HumanMessage:
+        content = [{"type": "text", "text": cls.prompt_instructions}]
+        for img_data in images_data:
+            content.append(
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{img_data}"},
+                }
+            )
+        return HumanMessage(content=content)
+    def extract_lat_lon_from_response(
+        self, response: BaseMessage
+    ) -> Optional[Tuple[float, float]]:
+        """Extracts latitude and longitude from LLM response using regex for robustness."""
+        try:
+            content = response.content.strip()
+            last_line = ""
+            for line in reversed(content.split("\n")):
+                if "lat" in line.lower() and "lon" in line.lower():
+                    last_line = line
+                    break
+            if not last_line:
+                print(f"❌ No coordinate line found in response.")
+                return None
+            print(f"🎯 {self.model_name} Prediction: {last_line}")
+            numbers = re.findall(r"[-+]?\d*\.\d+|\d+", last_line)
+            if len(numbers) < 2:
+                print(
+                    f"❌ Could not find two numbers for lat/lon in line: '{last_line}'"
+                )
+                return None
+            lat, lon = float(numbers[0]), float(numbers[1])
+            if not (-90 <= lat <= 90 and -180 <= lon <= 180):
+                print(f"❌ Invalid coordinates extracted: Lat {lat}, Lon {lon}")
+                return None
+            return lat, lon
+        except Exception as e:
+            print(
+                f"❌ Error parsing lat/lon from response: {e}\nFull response was:\n{content}"
+            )
+            return None
+    def take_screenshot(self) -> Optional[Image.Image]:
+        if self.use_selenium and self.controller:
+            screenshot_bytes = self.controller.take_street_view_screenshot()
+            if screenshot_bytes:
+                return Image.open(BytesIO(screenshot_bytes))
+        return None
+    def analyze_image(self, image: Image) -> Optional[Tuple[float, float]]:
+        """Analyze image and return predicted latitude and longitude."""
+        try:
+            screenshot_b64 = self.pil_to_base64(image)
+            message = self.create_message([screenshot_b64])
+            response = self.model.invoke([message])
+            print(f"\n🤖 Full response from {self.model_name}:")
+            print(response.content)
+            # 直接返回 (lat, lon) 元组
+            return self.extract_lat_lon_from_response(response)
+        except Exception as e:
+            print(f"❌ Error in analyze_image: {e}")
+            return None
+    def close(self):
+        if self.controller:
+            self.controller.close()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()

geoguessr_bot.py DELETED Viewed

@@ -1,200 +0,0 @@
-from io import BytesIO
-import os
-import dotenv
-import base64
-import pyautogui
-import matplotlib.pyplot as plt
-import math
-from time import time, sleep
-from typing import Tuple, List
-from PIL import Image
-from langchain_core.messages import HumanMessage, BaseMessage
-from langchain_openai import ChatOpenAI
-from langchain_anthropic import ChatAnthropic
-from langchain_google_genai import ChatGoogleGenerativeAI
-dotenv.load_dotenv()
-PROMPT_INSTRUCTIONS = """
-Try to predict where the image was taken.
-First describe the relevant details in the image to do it.
-List some regions and places where it could be.
-Chose the most likely Country and City or Specific Location.
-At the end, in the last line a part from the previous reasoning, write the Latitude and Longitude from that guessed location
-using the following format, making sure that the coords are valid floats, without anything else and making sure to be consistent with the format:
-Lat: XX.XXXX, Lon: XX.XXXX
-"""
-class GeoBot:
-    prompt_instructions: str = PROMPT_INSTRUCTIONS
-    def __init__(self, screen_regions, player=1, model=ChatOpenAI, model_name="gpt-4o"):
-        self.player = player
-        self.screen_regions = screen_regions
-        self.screen_x, self.screen_y = screen_regions["screen_top_left"]
-        self.screen_w = screen_regions["screen_bot_right"][0] - self.screen_x
-        self.screen_h = screen_regions["screen_bot_right"][1] - self.screen_y
-        self.screen_xywh = (self.screen_x, self.screen_y, self.screen_w, self.screen_h)
-        self.map_x, self.map_y = screen_regions[f"map_top_left_{player}"]
-        self.map_w = screen_regions[f"map_bot_right_{player}"][0] - self.map_x
-        self.map_h = screen_regions[f"map_bot_right_{player}"][1] - self.map_y
-        self.minimap_xywh = (self.map_x, self.map_y, self.map_w, self.map_h)
-        self.next_round_button = screen_regions["next_round_button"] if player==1 else None
-        self.confirm_button = screen_regions[f"confirm_button_{player}"]
-        self.kodiak_x, self.kodiak_y = screen_regions[f"kodiak_{player}"]
-        self.hobart_x, self.hobart_y = screen_regions[f"hobart_{player}"]
-        # Refernece points to calibrate the minimap everytime
-        self.kodiak_lat, self.kodiak_lon = (57.7916, -152.4083)
-        self.hobart_lat, self.hobart_lon = (-42.8833, 147.3355)
-        self.model = model(model=model_name)
-    @staticmethod
-    def pil_to_base64(image: Image) -> str:
-        buffered = BytesIO()
-        image.save(buffered, format="PNG")
-        img_base64_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
-        return img_base64_str
-    @classmethod
-    def create_message(cls, images_data: List[str]) -> HumanMessage:
-        message = HumanMessage(
-            content=[
-                {
-                    "type": "text",
-                    "text": cls.prompt_instructions,
-                },
-            ] + [
-                {
-                    "type": "image_url",
-                    "image_url": {"url": f"data:image/png;base64,{img_data}"},
-                }
-            for img_data in images_data],
-        )
-        return message
-    def extract_location_from_response(self, response: BaseMessage) -> Tuple[float, float]:
-        try:
-            response = response.content.split("\n")
-            while response and len(response[-1]) == 0 and "lat" not in response[-1].lower():
-                response.pop()
-            if response:
-                prediction = response[-1]
-            else:
-                return None
-            print(f"\n-------\n{self.model} Prediction:\n", prediction)
-            # Lat: 57.7916, Lon: -152.4083
-            lat = float(prediction.split(",")[0].split(":")[1])
-            lon = float(prediction.split(",")[1].split(":")[1])
-            x, y = self.lat_lon_to_mercator_map_pixels(lat, lon)
-            print(f"Normalized pixel coordinates: ({x}, {y})")
-            if x < self.map_x:
-                x = self.map_x
-                print("x out of bounds")
-            elif x > self.map_x+self.map_w:
-                x = self.map_x+self.map_w
-                print("x out of bounds")
-            if y < self.map_y:
-                y = self.map_y
-                print("y out of bounds")
-            elif y > self.map_y+self.map_h:
-                y = self.map_y+self.map_h
-                print("y out of bounds")
-            return x, y
-        except Exception as e:
-            print("Error:", e)
-            return None
-    @staticmethod
-    def lat_to_mercator_y(lat: float) -> float:
-        return math.log(math.tan(math.pi / 4 + math.radians(lat) / 2))
-    def lat_lon_to_mercator_map_pixels(self, lat: float, lon: float) -> Tuple[int, int]:
-        """
-        Convert latitude and longitude to pixel coordinates on the mercator projection minimap,
-        taking two known points 1 and 2 as a reference.
-        Args:
-            lat (float): Latitude (Decimal Degrees) of the point to convert.
-            lon (float): Longitude (Decimal Degrees) of the point to convert.
-        Returns:
-            tuple: x, y pixel coordinates of the point.
-        """
-        # Calculate the x pixel coordinate
-        lon_diff_ref = (self.kodiak_lon - self.hobart_lon)
-        lon_diff = (self.kodiak_lon - lon)
-        x = abs(self.kodiak_x - self.hobart_x) * (lon_diff / lon_diff_ref) + self.kodiak_x
-        # Convert latitude and longitude to mercator projection y coordinates
-        mercator_y1 = self.lat_to_mercator_y(self.kodiak_lat)
-        mercator_y2 = self.lat_to_mercator_y(self.hobart_lat)
-        mercator_y = self.lat_to_mercator_y(lat)
-        # Calculate the y pixel coordinate
-        lat_diff_ref = (mercator_y1 - mercator_y2)
-        lat_diff = (mercator_y1 - mercator_y)
-        y = abs(self.kodiak_y - self.hobart_y) * (lat_diff / lat_diff_ref) + self.kodiak_y
-        return round(x), round(y)
-    def select_map_location(self, x: int, y: int, plot: bool = False) -> None:
-        # Hovering over the minimap to expand it
-        pyautogui.moveTo(self.map_x+self.map_w-15, self.map_y+self.map_h-15, duration=0.5)
-        #bot.screen_w-50, bot.screen_h-80
-        # pyautogui.moveTo(self.screen_w-50, self.screen_h-80, duration=1.5)
-        # print(self.screen_w-50, self.screen_h-80)
-        print('finish moving')
-        sleep(0.5)
-        # Clicking on the predicted location
-        pyautogui.click(x, y, duration=0.5)
-        print('finish clicking')
-        sleep(0.5)
-        if plot:
-            self.plot_minimap(x, y)
-        # Confirming the guessed location
-        pyautogui.click(self.confirm_button, duration=0.2)
-        sleep(2)
-    def plot_minimap(self, x: int = None, y: int = None) -> None:
-        minimap = pyautogui.screenshot(region=self.minimap_xywh)
-        plot_kodiak_x = self.kodiak_x - self.map_x
-        plot_kodiak_y = self.kodiak_y - self.map_y
-        plot_hobart_x = self.hobart_x - self.map_x
-        plot_hobart_y = self.hobart_y - self.map_y
-        plt.imshow(minimap)
-        plt.plot(plot_hobart_x, plot_hobart_y, 'ro')
-        plt.plot(plot_kodiak_x, plot_kodiak_y, 'ro')
-        if x and y:
-            plt.plot(x-self.map_x, y-self.map_y, 'bo')
-        os.makedirs("plots", exist_ok=True)
-        plt.savefig("plots/minimap.png")
-        # plt.show()

main.py ADDED Viewed

	@@ -0,0 +1,255 @@

+#!/usr/bin/env python3
+"""
+Main entry point for MapCrunch geo-location testing
+Usage:
+    python main.py --mode data --samples 50 --urban --no-indoor   # Collect filtered data
+    python main.py --mode benchmark --models gpt-4o claude-3.5-sonnet  # Run benchmark
+    python main.py --mode interactive --model gpt-4o  # Interactive testing
+"""
+import argparse
+import os
+from time import sleep
+from typing import Dict
+from langchain_openai import ChatOpenAI
+from langchain_anthropic import ChatAnthropic
+from langchain_google_genai import ChatGoogleGenerativeAI
+from geo_bot import GeoBot
+from data_collector import DataCollector
+from benchmark import MapGuesserBenchmark
+from config import MODELS_CONFIG, SUCCESS_THRESHOLD_KM
+def interactive_mode(model_name: str = "gpt-4o", turns: int = 5, plot: bool = False):
+    """Interactive mode - play turns manually like the original"""
+    print(f"🎮 Starting interactive mode with {model_name}")
+    # Get model class
+    config = MODELS_CONFIG.get(model_name)
+    if not config:
+        print(f"❌ Unknown model: {model_name}")
+        return
+    model_class_name = config["class"]
+    model_class = globals()[model_class_name]
+    model_instance = config["model_name"]
+    # Create bot with Selenium integration
+    with GeoBot(model=model_class, model_name=model_instance, use_selenium=True) as bot:
+        # Setup clean environment
+        if bot.controller:
+            bot.controller.setup_clean_environment()
+        for turn in range(turns):
+            print(f"\n{'=' * 50}")
+            print(f"🎯 Turn {turn + 1}/{turns}")
+            print(f"{'=' * 50}")
+            try:
+                # Get new location (click Go button)
+                if bot.controller:
+                    if not bot.controller.click_go_button():
+                        print("❌ Failed to get new location")
+                        continue
+                else:
+                    print("⚠️  Manual mode: Please click Go button and press Enter")
+                    input()
+                # Take screenshot and analyze
+                screenshot = bot.take_screenshot()
+                location = bot.analyze_image(screenshot)
+                if location is not None:
+                    bot.select_map_location(*location, plot=plot)
+                    print("✅ Location selected successfully")
+                else:
+                    print("❌ Could not determine location")
+                    # Select a default location
+                    bot.select_map_location(
+                        x=bot.map_x + bot.map_w // 2,
+                        y=bot.map_y + bot.map_h // 2,
+                        plot=plot,
+                    )
+                # Brief pause between turns
+                sleep(2)
+            except KeyboardInterrupt:
+                print(f"\n⏹️  Game stopped by user after {turn + 1} turns")
+                break
+            except Exception as e:
+                print(f"❌ Error in turn {turn + 1}: {e}")
+                continue
+def data_collection_mode(
+    samples: int = 50, headless: bool = False, options: Dict = None
+):
+    """Data collection mode"""
+    print(f"📊 Starting data collection mode - {samples} samples")
+    if options:
+        print(f"🔧 Using custom options: {options}")
+    with DataCollector(headless=headless, options=options) as collector:
+        data = collector.collect_samples(samples)
+        print(f"✅ Collected {len(data)} samples successfully")
+def benchmark_mode(
+    models: list = None, samples: int = 10, live: bool = False, headless: bool = False
+):
+    """Benchmark mode"""
+    if models is None:
+        models = ["gpt-4o"]  # Default model
+    print(f"🏁 Starting benchmark mode")
+    print(f"   Models: {models}")
+    print(f"   Samples per model: {samples}")
+    print(f"   Mode: {'live' if live else 'offline'}")
+    benchmark = MapGuesserBenchmark(headless=headless)
+    try:
+        summary = benchmark.run_benchmark(
+            models=models, max_samples=samples, use_live_mode=live
+        )
+        print(f"\n🎉 Benchmark Complete!")
+        if summary:
+            print(f"\n📊 Results Summary:")
+            for model, stats in summary.items():
+                print(f"\n🤖 {model}:")
+                print(
+                    f"   Success Rate (under {SUCCESS_THRESHOLD_KM}km): {stats.get('success_rate', 0) * 100:.1f}%"
+                )
+                print(f"   📏 Average Distance: {stats['average_distance_km']:.1f} km")
+                print(f"   📊 Median Distance: {stats['median_distance_km']:.1f} km")
+                print(f"   🎯 Best: {stats['min_distance_km']:.1f} km")
+                print(f"   📈 Worst: {stats['max_distance_km']:.1f} km")
+    except Exception as e:
+        print(f"❌ Benchmark failed: {e}")
+def main():
+    parser = argparse.ArgumentParser(
+        description="MapCrunch Geo-Location AI Benchmark",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Collect training data with filters
+  python main.py --mode data --samples 100 --urban --no-indoor
+  # Collect from specific countries
+  python main.py --mode data --samples 50 --countries us gb jp --urban
+  # Run benchmark on saved data
+  python main.py --mode benchmark --models gpt-4o claude-3.5-sonnet --samples 20
+  # Interactive testing
+  python main.py --mode interactive --model gpt-4o --turns 5 --plot
+  # Live benchmark (uses MapCrunch website directly)
+  python main.py --mode benchmark --live --models gpt-4o
+        """,
+    )
+    parser.add_argument(
+        "--mode",
+        choices=["interactive", "data", "benchmark"],
+        default="interactive",
+        help="Operation mode",
+    )
+    # Interactive mode options
+    parser.add_argument(
+        "--model",
+        choices=list(MODELS_CONFIG.keys()),
+        default="gpt-4o",
+        help="Model for interactive mode",
+    )
+    parser.add_argument(
+        "--turns", type=int, default=5, help="Number of turns in interactive mode"
+    )
+    parser.add_argument(
+        "--plot", action="store_true", help="Generate plots of predictions"
+    )
+    # Data collection options
+    parser.add_argument(
+        "--samples", type=int, default=50, help="Number of samples to collect/test"
+    )
+    parser.add_argument(
+        "--urban", action="store_true", help="Collect only urban locations"
+    )
+    parser.add_argument("--no-indoor", action="store_true", help="Exclude indoor views")
+    parser.add_argument(
+        "--countries",
+        nargs="+",
+        help="Specific countries to collect from (e.g., us gb jp)",
+    )
+    # Benchmark options
+    parser.add_argument(
+        "--models",
+        nargs="+",
+        choices=list(MODELS_CONFIG.keys()),
+        help="Models to benchmark",
+    )
+    parser.add_argument(
+        "--live", action="store_true", help="Use live MapCrunch website for benchmark"
+    )
+    # General options
+    parser.add_argument(
+        "--headless", action="store_true", help="Run browser in headless mode"
+    )
+    args = parser.parse_args()
+    print(f"🚀 MapCrunch Geo-Location AI Benchmark")
+    print(f"   Mode: {args.mode}")
+    try:
+        if args.mode == "interactive":
+            interactive_mode(model_name=args.model, turns=args.turns, plot=args.plot)
+        elif args.mode == "data":
+            # Configure collection options from args
+            from config import MAPCRUNCH_OPTIONS
+            options = MAPCRUNCH_OPTIONS.copy()
+            if args.urban:
+                options["urban_only"] = True
+            if args.no_indoor:
+                options["exclude_indoor"] = True
+            if args.countries:
+                options["selected_countries"] = args.countries
+            data_collection_mode(
+                samples=args.samples, headless=args.headless, options=options
+            )
+        elif args.mode == "benchmark":
+            benchmark_mode(
+                models=args.models,
+                samples=args.samples,
+                live=args.live,
+                headless=args.headless,
+            )
+    except KeyboardInterrupt:
+        print(f"\n⏹️  Operation interrupted by user")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        raise
+if __name__ == "__main__":
+    main()

mapcrunch_controller.py ADDED Viewed

	@@ -0,0 +1,272 @@

+# mapcrunch_controller.py
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.action_chains import ActionChains
+from selenium.webdriver.chrome.options import Options
+import time
+from typing import Dict, Optional, Tuple
+from config import MAPCRUNCH_URL, SELECTORS, SELENIUM_CONFIG
+class MapCrunchController:
+    """Selenium controller for MapCrunch website automation"""
+    def __init__(self, headless: bool = False):
+        self.driver = None
+        self.wait = None
+        self.headless = headless
+        self.setup_driver()
+    def setup_driver(self):
+        """Initialize Chrome driver with appropriate settings"""
+        chrome_options = Options()
+        if self.headless:
+            chrome_options.add_argument("--headless")
+        chrome_options.add_argument(
+            f"--window-size={SELENIUM_CONFIG['window_size'][0]},{SELENIUM_CONFIG['window_size'][1]}"
+        )
+        chrome_options.add_argument("--disable-gpu")
+        chrome_options.add_argument("--no-sandbox")
+        chrome_options.add_argument("--disable-dev-shm-usage")
+        self.driver = webdriver.Chrome(options=chrome_options)
+        self.driver.set_window_size(*SELENIUM_CONFIG["window_size"])
+        self.wait = WebDriverWait(self.driver, SELENIUM_CONFIG["implicit_wait"])
+        self.driver.get(MAPCRUNCH_URL)
+        time.sleep(3)
+    def setup_clean_environment(self):
+        """Configure MapCrunch for clean benchmark environment"""
+        try:
+            assert self.driver is not None
+            self.driver.execute_script("""
+                const elementsToHide = ['#menu', '#info-box', '#social', '#bottom-box'];
+                elementsToHide.forEach(sel => {
+                    const el = document.querySelector(sel);
+                    if (el) el.style.display = 'none';
+                });
+            """)
+            print("✅ Environment configured for clean benchmark")
+        except Exception as e:
+            print(f"⚠️  Warning: Could not fully configure environment: {e}")
+    def setup_collection_options(self, options: Dict = None):
+        from config import MAPCRUNCH_OPTIONS
+        if options is None:
+            options = MAPCRUNCH_OPTIONS
+        try:
+            assert self.wait is not None
+            options_button = self.wait.until(
+                EC.element_to_be_clickable(
+                    (By.CSS_SELECTOR, SELECTORS["options_button"])
+                )
+            )
+            options_button.click()
+            time.sleep(1)
+            assert self.driver is not None
+            # Urban
+            urban_checkbox = self.driver.find_element(
+                By.CSS_SELECTOR, SELECTORS["urban_checkbox"]
+            )
+            if options.get("urban_only", False) != urban_checkbox.is_selected():
+                urban_checkbox.click()
+            # Indoor
+            indoor_checkbox = self.driver.find_element(
+                By.CSS_SELECTOR, SELECTORS["indoor_checkbox"]
+            )
+            if options.get("exclude_indoor", True) == indoor_checkbox.is_selected():
+                indoor_checkbox.click()
+            # Stealth
+            stealth_checkbox = self.driver.find_element(
+                By.CSS_SELECTOR, SELECTORS["stealth_checkbox"]
+            )
+            if options.get("stealth_mode", True) != stealth_checkbox.is_selected():
+                stealth_checkbox.click()
+            options_button.click()
+            time.sleep(0.5)
+            print("✅ Collection options configured")
+            return True
+        except Exception as e:
+            print(f"❌ Error configuring options: {e}")
+            return False
+    def _select_countries(self, country_codes: list):
+        """Select specific countries in the options panel"""
+        try:
+            # First, deselect all
+            assert self.driver is not None
+            all_countries = self.driver.find_elements(By.CSS_SELECTOR, "#countrylist a")
+            for country in all_countries:
+                class_attr = country.get_attribute("class")
+                if class_attr is not None and "hover" not in class_attr:
+                    country.click()
+                    time.sleep(0.1)
+            # Then select desired countries
+            for code in country_codes:
+                country = self.driver.find_element(
+                    By.CSS_SELECTOR, f'a[data-code="{code}"]'
+                )
+                class_attr = country.get_attribute("class")
+                if class_attr is not None and "hover" in class_attr:
+                    country.click()
+                    time.sleep(0.1)
+            print(f"✅ Selected countries: {country_codes}")
+        except Exception as e:
+            print(f"⚠️  Warning: Could not select countries: {e}")
+    def click_go_button(self) -> bool:
+        """Click the Go button to get new Street View location"""
+        try:
+            assert self.wait is not None
+            go_button = self.wait.until(
+                EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["go_button"]))
+            )
+            go_button.click()
+            # **重要**: 等待JS执行完毕并更新内容
+            time.sleep(DATA_COLLECTION_CONFIG.get("wait_after_go", 5))
+            return True
+        except Exception as e:
+            print(f"❌ Error clicking Go button: {e}")
+            return False
+    def get_current_address(self) -> Optional[str]:
+        """Extract current address/location name from the page"""
+        try:
+            assert self.wait is not None
+            address_element = self.wait.until(
+                EC.visibility_of_element_located(
+                    (By.CSS_SELECTOR, SELECTORS["address_element"])
+                )
+            )
+            address_text = address_element.text.strip()
+            address_title = address_element.get_attribute("title") or ""
+            return (
+                address_title
+                if len(address_title) > len(address_text)
+                else address_text
+            )
+        except Exception:
+            # 在stealth模式下，这个元素可能是隐藏的，所以找不到是正常的
+            return "Stealth Mode"
+    # **新增**: 重新加入 get_map_element_info 函数
+    def get_map_element_info(self) -> Dict:
+        """Get map element position and size for coordinate conversion."""
+        try:
+            assert self.wait is not None
+            map_element = self.wait.until(
+                EC.presence_of_element_located(
+                    (By.CSS_SELECTOR, SELECTORS["map_container"])
+                )
+            )
+            rect = map_element.rect
+            location = map_element.location
+            return {
+                "x": location["x"],
+                "y": location["y"],
+                "width": rect["width"],
+                "height": rect["height"],
+                "element": map_element,
+            }
+        except Exception as e:
+            # 这个函数在benchmark中不是必须的，只是GeoBot初始化需要，可以优雅地失败
+            # print(f"⚠️ Could not get map element info: {e}")
+            return {}
+    def take_street_view_screenshot(self) -> Optional[bytes]:
+        """Take screenshot of the Street View area"""
+        try:
+            assert self.wait is not None
+            pano_element = self.wait.until(
+                EC.presence_of_element_located(
+                    (By.CSS_SELECTOR, SELECTORS["pano_container"])
+                )
+            )
+            return pano_element.screenshot_as_png
+        except Exception as e:
+            print(f"❌ Error taking screenshot: {e}")
+            return None
+    # **新增**: 获取实时页面标识符的方法
+    def get_live_location_identifiers(self) -> Dict:
+        """Executes JS to get the identifiers of the CURRENTLY displayed location."""
+        try:
+            assert self.driver is not None
+            # 调用网站自己的JS函数来获取实时链接
+            live_identifiers = self.driver.execute_script("""
+                try {
+                    return {
+                        permLink: getPermLink(), // 调用网站自己的函数
+                        panoId: window.panorama.getPano(),
+                        urlString: urlSlug() // 调用网站自己的函数
+                    };
+                } catch (e) {
+                    return { error: e.toString() };
+                }
+            """)
+            return live_identifiers
+        except Exception as e:
+            print(f"❌ Error getting live identifiers: {e}")
+            return {}
+    # **修改**: 增强 load_location_from_data
+    def load_location_from_data(self, location_data: Dict) -> bool:
+        """Load a specific location by navigating to its permanent link."""
+        try:
+            assert self.driver is not None
+            # **优先使用 perm_link 或 url (现在应该已经是正确的了)**
+            url_to_load = location_data.get("perm_link") or location_data.get("url")
+            if url_to_load and "/p/" in url_to_load:
+                print(f"✅ Loading location via perm_link: {url_to_load}")
+                self.driver.get(url_to_load)
+                time.sleep(3)  # 等待场景加载
+                return True
+            # **备用方案: 根据坐标和视角手动构建链接 (来自您建议的格式)**
+            lat = location_data.get("lat")
+            lng = location_data.get("lng")
+            if lat and lng:
+                # 尝试从 identifiers 中获取视角信息
+                pov = "232.46_-5_0"  # 默认视角
+                # 注意: 采集时也应该保存 pov 信息，此处为简化
+                url_slug = f"{lat}_{lng}_{pov}"
+                url_to_load = f"{MAPCRUNCH_URL}/p/{url_slug}"
+                print(f"✅ Loading location by constructing URL: {url_to_load}")
+                self.driver.get(url_to_load)
+                time.sleep(3)
+                return True
+            print(
+                "⚠️  No valid location identifier (perm_link, url, or coords) found in data."
+            )
+            return False
+        except Exception as e:
+            print(f"❌ Error loading location: {e}")
+            return False
+    def close(self):
+        if self.driver:
+            self.driver.quit()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,122 @@

+[project]
+name = "simple-g-ai-bot"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "aiohappyeyeballs==2.4.3",
+    "aiohttp==3.10.10",
+    "aiosignal==1.3.1",
+    "annotated-types==0.7.0",
+    "anthropic==0.36.2",
+    "anyio==4.6.2.post1",
+    "asttokens==2.4.1",
+    "attrs==24.2.0",
+    "cachetools==5.5.0",
+    "certifi==2024.8.30",
+    "charset-normalizer==3.4.0",
+    "colorama==0.4.6",
+    "comm==0.2.2",
+    "contourpy==1.3.0",
+    "cycler==0.12.1",
+    "debugpy==1.8.7",
+    "decorator==5.1.1",
+    "defusedxml==0.7.1",
+    "distro==1.9.0",
+    "executing==2.1.0",
+    "filelock==3.16.1",
+    "fonttools==4.54.1",
+    "frozenlist==1.4.1",
+    "fsspec==2024.9.0",
+    "google-ai-generativelanguage==0.6.10",
+    "google-api-core==2.21.0",
+    "google-api-python-client==2.149.0",
+    "google-auth==2.35.0",
+    "google-auth-httplib2==0.2.0",
+    "google-generativeai==0.8.3",
+    "googleapis-common-protos==1.65.0",
+    "greenlet==3.1.1",
+    "grpcio==1.67.0",
+    "grpcio-status==1.67.0",
+    "h11==0.14.0",
+    "httpcore==1.0.6",
+    "httplib2==0.22.0",
+    "httpx==0.27.2",
+    "huggingface-hub==0.26.0",
+    "idna==3.10",
+    "iprogress==0.4",
+    "ipykernel==6.29.5",
+    "ipython==8.28.0",
+    "jedi==0.19.1",
+    "jiter==0.6.1",
+    "jsonpatch==1.33",
+    "jsonpointer==3.0.0",
+    "jupyter-client==8.6.3",
+    "jupyter-core==5.7.2",
+    "kiwisolver==1.4.7",
+    "langchain==0.3.4",
+    "langchain-anthropic==0.2.3",
+    "langchain-core==0.3.12",
+    "langchain-google-genai==2.0.1",
+    "langchain-openai==0.2.3",
+    "langchain-text-splitters==0.3.0",
+    "langsmith==0.1.136",
+    "matplotlib==3.9.2",
+    "matplotlib-inline==0.1.7",
+    "mouseinfo==0.1.3",
+    "multidict==6.1.0",
+    "nest-asyncio==1.6.0",
+    "numpy==1.26.4",
+    "openai==1.52.0",
+    "opencv-python==4.10.0.84",
+    "orjson==3.10.9",
+    "packaging==24.1",
+    "parso==0.8.4",
+    "pillow==11.0.0",
+    "platformdirs==4.3.6",
+    "prompt-toolkit==3.0.48",
+    "propcache==0.2.0",
+    "proto-plus==1.24.0",
+    "protobuf==5.28.2",
+    "psutil==6.1.0",
+    "pure-eval==0.2.3",
+    "pyasn1==0.6.1",
+    "pyasn1-modules==0.4.1",
+    "pyautogui==0.9.54",
+    "pydantic==2.9.2",
+    "pydantic-core==2.23.4",
+    "pygetwindow==0.0.9",
+    "pygments==2.18.0",
+    "pymsgbox==1.0.9",
+    "pynput==1.7.7",
+    "pyparsing==3.2.0",
+    "pyperclip==1.9.0",
+    "pyrect==0.2.0",
+    "pyscreeze==1.0.1",
+    "python-dateutil==2.9.0.post0",
+    "python-dotenv==1.0.1",
+    "pytweening==1.2.0",
+    "pyyaml==6.0.2",
+    "pyzmq==26.2.0",
+    "regex==2024.9.11",
+    "requests==2.32.3",
+    "requests-toolbelt==1.0.0",
+    "rsa==4.9",
+    "selenium>=4.32.0",
+    "six==1.16.0",
+    "sniffio==1.3.1",
+    "sqlalchemy==2.0.36",
+    "stack-data==0.6.3",
+    "tenacity==9.0.0",
+    "tiktoken==0.8.0",
+    "tokenizers==0.20.1",
+    "tornado==6.4.1",
+    "tqdm==4.66.5",
+    "traitlets==5.14.3",
+    "typing-extensions==4.12.2",
+    "uritemplate==4.1.1",
+    "urllib3==2.2.3",
+    "wcwidth==0.2.13",
+    "yarl==1.15.5",
+]

readme.md CHANGED Viewed

@@ -1,6 +1,4 @@
-## GeoGuessr AI Bot
-In this project you will see how to program a Python AI Bot that uses PyAutoGUI, LangChain and Vision LLMs (GPT-4o, Gemini 1.5 and Claude 3.5) to automatically play to the GeoGuessr game.
-[YouTube Video](https://www.youtube.com/watch?v=OyDfr0xIhss)
-[Medium Blog](https://medium.com/@enricdomingo/coding-a-geoguessr-autonomous-ai-bot-with-vision-llms-gpt-4o-claude-3-5-and-gemini-1-5-908faf3bc3c7)


1	+ python main.py --mode data --samples 50 --urban --no-indoor
2

3
4	+ python main.py --mode benchmark --models gpt-4o

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

select_regions.py DELETED Viewed

@@ -1,66 +0,0 @@
-import pyautogui
-from pynput import keyboard
-import yaml
-regions = [
-    "screen_top_left",
-    "screen_bot_right",
-]
-map_regions = [
-    "map_top_left",
-    "map_bot_right",
-    "confirm_button",
-    "kodiak",
-    "hobart",
-]
-next_round_button = "next_round_button"
-coords = []
-PRESS_KEY = "a"
-def on_press(key):
-    try:
-        if key.char == PRESS_KEY:
-            x, y = pyautogui.position()
-            print(x, y)
-            coords.append([x, y])
-            return False
-    except AttributeError:
-        pass
-def get_coords(players=1):
-    for region in regions:
-        print(f"Move the mouse to the {region} region and press 'a'.")
-        with keyboard.Listener(on_press=on_press) as keyboard_listener:
-            keyboard_listener.join(timeout=40)
-    for p in range(1, players+1):
-        for region in map_regions:
-            region = region + f"_{p}"
-            regions.append(region)
-            print(f"Move the mouse to the {region} region and press 'a'.")
-            with keyboard.Listener(on_press=on_press) as keyboard_listener:
-                keyboard_listener.join(timeout=40)
-    regions.append(next_round_button)
-    print(f"Move the mouse to the {next_round_button} region and press 'a'.")
-    with keyboard.Listener(on_press=on_press) as keyboard_listener:
-        keyboard_listener.join(timeout=40)
-    screen_regions = {reg: coord for reg, coord in zip(regions, coords)}
-    # save dict as a yaml file
-    with open("screen_regions.yaml", "w") as f:
-        yaml.dump(screen_regions, f)
-    return screen_regions
-if __name__ == "__main__":
-    _ = get_coords(players=1)

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

view_data_collect.py ADDED Viewed

	@@ -0,0 +1,265 @@

+#!/usr/bin/env python3
+"""
+Quick viewer for collected MapCrunch data
+"""
+import json
+import os
+from pathlib import Path
+from PIL import Image
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+from collections import Counter
+def view_data_summary(data_file='data/golden_labels.json'):
+    """Display summary of collected data"""
+    try:
+        with open(data_file, 'r') as f:
+            data = json.load(f)
+    except FileNotFoundError:
+        print(f"❌ No data file found at {data_file}")
+        print("💡 Run data collection first: python main.py --mode data --samples 50")
+        return
+    samples = data.get('samples', [])
+    metadata = data.get('metadata', {})
+    print(f"📊 MapCrunch Data Collection Summary")
+    print(f"{'='*50}")
+    print(f"📅 Collection Date: {metadata.get('collection_date', 'Unknown')}")
+    print(f"📍 Total Samples: {len(samples)}")
+    print(f"🏙️  Collection Options: {metadata.get('collection_options', {})}")
+    # Statistics
+    stats = metadata.get('statistics', {})
+    if stats:
+        print(f"\n📈 Statistics:")
+        for key, value in stats.items():
+            print(f"   {key}: {value}")
+    # Country distribution
+    countries = []
+    for sample in samples:
+        address = sample.get('address', '')
+        if address and address != 'Unknown':
+            # Extract country (usually last part after comma)
+            country = address.split(', ')[-1].strip()
+            countries.append(country)
+    if countries:
+        country_counts = Counter(countries)
+        print(f"\n🌍 Top Countries:")
+        for country, count in country_counts.most_common(10):
+            print(f"   {country}: {count} samples")
+    # Coordinate coverage
+    coords_available = sum(1 for s in samples if s.get('lat') is not None)
+    print(f"\n📍 Coordinate Coverage: {coords_available}/{len(samples)} ({coords_available/len(samples)*100:.1f}%)")
+    # Thumbnail coverage
+    thumbnails_available = sum(1 for s in samples if s.get('has_thumbnail'))
+    print(f"📸 Thumbnail Coverage: {thumbnails_available}/{len(samples)} ({thumbnails_available/len(samples)*100:.1f}%)")
+    # Sample locations
+    print(f"\n📍 Sample Locations:")
+    for i, sample in enumerate(samples[:10]):
+        address = sample.get('address', 'Unknown')
+        lat = sample.get('lat', 'N/A')
+        lng = sample.get('lng', 'N/A')
+        has_thumb = "📸" if sample.get('has_thumbnail') else "❌"
+        print(f"   {i+1}. {has_thumb} {address} ({lat}, {lng})")
+    if len(samples) > 10:
+        print(f"   ... and {len(samples) - 10} more")
+def create_thumbnail_gallery(data_file='data/golden_labels.json', output_file='data/gallery.html', max_images=100):
+    """Create an HTML gallery of collected thumbnails"""
+    with open(data_file, 'r') as f:
+        data = json.load(f)
+    samples = data.get('samples', [])
+    html = """
+    <html>
+    <head>
+        <title>MapCrunch Collection Gallery</title>
+        <style>
+            body { font-family: Arial, sans-serif; background: #f0f0f0; }
+            h1 { text-align: center; }
+            .gallery { display: flex; flex-wrap: wrap; justify-content: center; }
+            .item {
+                margin: 10px;
+                background: white;
+                padding: 10px;
+                border-radius: 8px;
+                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+                text-align: center;
+            }
+            .item img { max-width: 320px; border-radius: 4px; }
+            .address { font-weight: bold; margin: 5px 0; }
+            .coords { font-size: 0.9em; color: #666; }
+            .stats { margin: 20px; text-align: center; }
+        </style>
+    </head>
+    <body>
+        <h1>MapCrunch Collection Gallery</h1>
+    """
+    # Add statistics
+    total = len(samples)
+    with_thumb = sum(1 for s in samples if s.get('has_thumbnail'))
+    with_coords = sum(1 for s in samples if s.get('lat') is not None)
+    html += f"""
+        <div class="stats">
+            <p>Total Samples: {total} | With Thumbnails: {with_thumb} | With Coordinates: {with_coords}</p>
+        </div>
+        <div class="gallery">
+    """
+    # Add thumbnails
+    count = 0
+    for sample in samples:
+        if count >= max_images:
+            break
+        if sample.get('thumbnail_path'):
+            thumb_path = f"thumbnails/{sample['thumbnail_path']}"
+            address = sample.get('address', 'Unknown')
+            lat = sample.get('lat', 'N/A')
+            lng = sample.get('lng', 'N/A')
+            html += f"""
+            <div class="item">
+                <img src="{thumb_path}" alt="{address}">
+                <div class="address">{address}</div>
+                <div class="coords">{lat}, {lng}</div>
+            </div>
+            """
+            count += 1
+    html += """
+        </div>
+    </body>
+    </html>
+    """
+    with open(output_file, 'w') as f:
+        f.write(html)
+    print(f"✅ Gallery created: {output_file}")
+    print(f"📸 Included {count} images")
+    print(f"💡 Open in browser: file://{os.path.abspath(output_file)}")
+def plot_thumbnails_grid(data_file='data/golden_labels.json', max_images=20):
+    """Display a grid of thumbnails using matplotlib"""
+    with open(data_file, 'r') as f:
+        data = json.load(f)
+    samples = [s for s in data['samples'] if s.get('thumbnail_path')][:max_images]
+    if not samples:
+        print("❌ No samples with thumbnails found")
+        return
+    # Create grid
+    cols = 5
+    rows = (len(samples) + cols - 1) // cols
+    fig, axes = plt.subplots(rows, cols, figsize=(15, rows * 3))
+    if rows == 1:
+        axes = axes.reshape(1, -1)
+    for i, sample in enumerate(samples):
+        row = i // cols
+        col = i % cols
+        thumb_path = f"data/thumbnails/{sample['thumbnail_path']}"
+        if os.path.exists(thumb_path):
+            img = mpimg.imread(thumb_path)
+            axes[row, col].imshow(img)
+            axes[row, col].set_title(sample.get('address', 'Unknown')[:30] + '...', fontsize=8)
+        axes[row, col].axis('off')
+    # Hide empty subplots
+    for i in range(len(samples), rows * cols):
+        row = i // cols
+        col = i % cols
+        axes[row, col].axis('off')
+    plt.tight_layout()
+    plt.suptitle(f'MapCrunch Collection Sample ({len(samples)} locations)', y=1.02)
+    plt.show()
+def export_coordinates_csv(data_file='data/golden_labels.json', output_file='data/coordinates.csv'):
+    """Export coordinates to CSV for mapping"""
+    import csv
+    with open(data_file, 'r') as f:
+        data = json.load(f)
+    samples = data.get('samples', [])
+    with open(output_file, 'w', newline='', encoding='utf-8') as f:
+        writer = csv.writer(f)
+        writer.writerow(['id', 'address', 'latitude', 'longitude', 'has_thumbnail'])
+        count = 0
+        for sample in samples:
+            if sample.get('lat') is not None and sample.get('lng') is not None:
+                writer.writerow([
+                    sample['id'][:8],
+                    sample.get('address', 'Unknown'),
+                    sample['lat'],
+                    sample['lng'],
+                    'Yes' if sample.get('has_thumbnail') else 'No'
+                ])
+                count += 1
+    print(f"✅ Exported {count} coordinates to {output_file}")
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description='View collected MapCrunch data')
+    parser.add_argument('--gallery', action='store_true', help='Create HTML gallery')
+    parser.add_argument('--grid', action='store_true', help='Show thumbnail grid')
+    parser.add_argument('--csv', action='store_true', help='Export coordinates to CSV')
+    parser.add_argument('--data', default='data/golden_labels.json', help='Data file path')
+    parser.add_argument('--max-images', type=int, default=50, help='Max images for gallery/grid')
+    args = parser.parse_args()
+    if not os.path.exists(args.data):
+        print(f"❌ Data file not found: {args.data}")
+        print("💡 Run data collection first: python main.py --mode data --samples 50")
+        return
+    # Always show summary
+    view_data_summary(args.data)
+    # Additional actions
+    if args.gallery:
+        print()
+        create_thumbnail_gallery(args.data, max_images=args.max_images)
+    if args.grid:
+        print()
+        plot_thumbnails_grid(args.data, max_images=args.max_images)
+    if args.csv:
+        print()
+        export_coordinates_csv(args.data)
+if __name__ == "__main__":
+    main()