Spaces:

Chrisyichuan
/

Omniscient

Building

App Files Files Community

enricd commited on Oct 22, 2024

Commit

54b4730

0 Parent(s):

push project to github

Browse files

Files changed (8) hide show

.gitignore +3 -0
dev_geoguessr_bot.ipynb +0 -0
geoguessr_bot.py +194 -0
main_single_player.py +60 -0
readme.md +0 -0
requirements.txt +0 -0
screen_regions.yaml +24 -0
select_regions.py +66 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+venv/
+.env
+__pycache__

dev_geoguessr_bot.ipynb ADDED Viewed

File without changes

geoguessr_bot.py ADDED Viewed

	@@ -0,0 +1,194 @@

+from io import BytesIO
+import os
+import dotenv
+import base64
+import pyautogui
+import matplotlib.pyplot as plt
+import math
+from time import time, sleep
+from typing import Tuple, List
+from PIL import Image
+from langchain_core.messages import HumanMessage, BaseMessage
+from langchain_openai import ChatOpenAI
+from langchain_anthropic import ChatAnthropic
+from langchain_google_genai import ChatGoogleGenerativeAI
+dotenv.load_dotenv()
+PROMPT_INSTRUCTIONS = """
+Try to predict where the image was taken.
+First describe the relevant details in the image to do it.
+List some regions and places where it could be.
+Chose the most likely Country and City or Specific Location.
+At the end, in the last line a part from the previous reasoning, write the Latitude and Longitude from that guessed location
+using the following format, making sure that the coords are valid floats, without anything else and making sure to be consistent with the format:
+Lat: XX.XXXX, Lon: XX.XXXX
+"""
+class GeoBot:
+    prompt_instructions: str = PROMPT_INSTRUCTIONS
+    def __init__(self, screen_regions, player=1, model=ChatOpenAI, model_name="gpt-4o"):
+        self.player = player
+        self.screen_regions = screen_regions
+        self.screen_x, self.screen_y = screen_regions["screen_top_left"]
+        self.screen_w = screen_regions["screen_bot_right"][0] - self.screen_x
+        self.screen_h = screen_regions["screen_bot_right"][1] - self.screen_y
+        self.screen_xywh = (self.screen_x, self.screen_y, self.screen_w, self.screen_h)
+        self.map_x, self.map_y = screen_regions[f"map_top_left_{player}"]
+        self.map_w = screen_regions[f"map_bot_right_{player}"][0] - self.map_x
+        self.map_h = screen_regions[f"map_bot_right_{player}"][1] - self.map_y
+        self.minimap_xywh = (self.map_x, self.map_y, self.map_w, self.map_h)
+        self.next_round_button = screen_regions["next_round_button"] if player==1 else None
+        self.confirm_button = screen_regions[f"confirm_button_{player}"]
+        self.kodiak_x, self.kodiak_y = screen_regions[f"kodiak_{player}"]
+        self.hobart_x, self.hobart_y = screen_regions[f"hobart_{player}"]
+        # Refernece points to calibrate the minimap everytime
+        self.kodiak_lat, self.kodiak_lon = (57.7916, -152.4083)
+        self.hobart_lat, self.hobart_lon = (-42.8833, 147.3355)
+        self.model = model(model=model_name)
+    @staticmethod
+    def pil_to_base64(image: Image) -> str:
+        buffered = BytesIO()
+        image.save(buffered, format="PNG")
+        img_base64_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
+        return img_base64_str
+    @classmethod
+    def create_message(cls, images_data: List[str]) -> HumanMessage:
+        message = HumanMessage(
+            content=[
+                {
+                    "type": "text",
+                    "text": cls.prompt_instructions,
+                },
+            ] + [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{img_data}"},
+                }
+            for img_data in images_data],
+        )
+        return message
+    def extract_location_from_response(self, response: BaseMessage) -> Tuple[float, float]:
+        try:
+            response = response.content.split("\n")
+            while response and len(response[-1]) == 0 and "lat" not in response[-1].lower():
+                response.pop()
+            if response:
+                prediction = response[-1]
+            else:
+                return None
+            print(f"\n-------\n{self.model} Prediction:\n", prediction)
+            # Lat: 57.7916, Lon: -152.4083
+            lat = float(prediction.split(",")[0].split(":")[1])
+            lon = float(prediction.split(",")[1].split(":")[1])
+            x, y = self.lat_lon_to_mercator_map_pixels(lat, lon)
+            print(f"Normalized pixel coordinates: ({x}, {y})")
+            if x < self.map_x:
+                x = self.map_x
+                print("x out of bounds")
+            elif x > self.map_x+self.map_w:
+                x = self.map_x+self.map_w
+                print("x out of bounds")
+            if y < self.map_y:
+                y = self.map_y
+                print("y out of bounds")
+            elif y > self.map_y+self.map_h:
+                y = self.map_y+self.map_h
+                print("y out of bounds")
+            return x, y
+        except Exception as e:
+            print("Error:", e)
+            return None
+    @staticmethod
+    def lat_to_mercator_y(lat: float) -> float:
+        return math.log(math.tan(math.pi / 4 + math.radians(lat) / 2))
+    def lat_lon_to_mercator_map_pixels(self, lat: float, lon: float) -> Tuple[int, int]:
+        """
+        Convert latitude and longitude to pixel coordinates on the mercator projection minimap,
+        taking two known points 1 and 2 as a reference.
+        Args:
+            lat (float): Latitude (Decimal Degrees) of the point to convert.
+            lon (float): Longitude (Decimal Degrees) of the point to convert.
+        Returns:
+            tuple: x, y pixel coordinates of the point.
+        """
+        # Calculate the x pixel coordinate
+        lon_diff_ref = (self.kodiak_lon - self.hobart_lon)
+        lon_diff = (self.kodiak_lon - lon)
+        x = abs(self.kodiak_x - self.hobart_x) * (lon_diff / lon_diff_ref) + self.kodiak_x
+        # Convert latitude and longitude to mercator projection y coordinates
+        mercator_y1 = self.lat_to_mercator_y(self.kodiak_lat)
+        mercator_y2 = self.lat_to_mercator_y(self.hobart_lat)
+        mercator_y = self.lat_to_mercator_y(lat)
+        # Calculate the y pixel coordinate
+        lat_diff_ref = (mercator_y1 - mercator_y2)
+        lat_diff = (mercator_y1 - mercator_y)
+        y = abs(self.kodiak_y - self.hobart_y) * (lat_diff / lat_diff_ref) + self.kodiak_y
+        return round(x), round(y)
+    def select_map_location(self, x: int, y: int, plot: bool = False) -> None:
+        # Hovering over the minimap to expand it
+        pyautogui.moveTo(self.map_x+self.map_w-15, self.map_y+self.map_h-15, duration=0.5)
+        sleep(0.5)
+        # Clicking on the predicted location
+        pyautogui.click(x, y, duration=0.5)
+        if plot:
+            self.plot_minimap(x, y)
+        # Confirming the guessed location
+        pyautogui.click(self.confirm_button, duration=0.2)
+        sleep(2)
+    def plot_minimap(self, x: int = None, y: int = None) -> None:
+        minimap = pyautogui.screenshot(region=self.minimap_xywh)
+        plot_kodiak_x = self.kodiak_x - self.map_x
+        plot_kodiak_y = self.kodiak_y - self.map_y
+        plot_hobart_x = self.hobart_x - self.map_x
+        plot_hobart_y = self.hobart_y - self.map_y
+        plt.imshow(minimap)
+        plt.plot(plot_hobart_x, plot_hobart_y, 'ro')
+        plt.plot(plot_kodiak_x, plot_kodiak_y, 'ro')
+        if x and y:
+            plt.plot(x-self.map_x, y-self.map_y, 'bo')
+        os.makedirs("plots", exist_ok=True)
+        plt.savefig("plots/minimap.png")
+        # plt.show()

main_single_player.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import pyautogui
+import yaml
+import os
+from time import sleep
+from langchain_openai import ChatOpenAI
+from langchain_anthropic import ChatAnthropic
+from langchain_google_genai import ChatGoogleGenerativeAI
+from select_regions import get_coords
+from geoguessr_bot import GeoBot
+def play_turn(bot: GeoBot, plot: bool = False):
+    screenshot = pyautogui.screenshot(region=bot.screen_xywh)
+    screenshot_b64 = GeoBot.pil_to_base64(screenshot)
+    message = GeoBot.create_message([screenshot_b64])
+    response = bot.model.invoke([message])
+    print(response.content)
+    location = bot.extract_location_from_response(response)
+    if location is None:
+        # Second try
+        response = bot.model.invoke([message])
+        print(response.content)
+        location = bot.extract_location_from_response(response)
+    if location is not None:
+        bot.select_map_location(*location, plot=plot)
+    else:
+        print("Error getting a location for second time")
+        bot.select_map_location(x=1, y=1, plot=plot)
+    # Going to the next round
+    pyautogui.press(" ")
+    sleep(2)
+def main(turns=5, plot=False):
+    if "screen_regions.yaml" not in os.listdir():
+        screen_regions = get_coords(players=1)
+    with open("screen_regions.yaml") as f:
+        screen_regions = yaml.safe_load(f)
+    bot = GeoBot(
+        screen_regions=screen_regions,
+        player=1,
+        model=ChatOpenAI,  # ChatOpenAI, ChatGoogleGenerativeAI, ChatAnthropic
+        model_name="gpt-4o",   # gpt-4o, gemini-1.5-pro, claude-3-5-sonnet-20240620
+    )
+    for turn in range(turns):
+        print("\n----------------")
+        print(f"Turn {turn+1}/{turns}")
+        play_turn(bot=bot, plot=plot)
+if __name__ == "__main__":
+    main(turns=5, plot=True)

readme.md ADDED Viewed

File without changes

requirements.txt ADDED Viewed

Binary file (4.43 kB). View file

screen_regions.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+confirm_button_1:
+- 2485
+- 959
+hobart_1:
+- 2417
+- 773
+kodiak_1:
+- 1883
+- 565
+map_bot_right_1:
+- 2521
+- 919
+map_top_left_1:
+- 1820
+- 420
+next_round_button:
+- 2020
+- 907
+screen_bot_right:
+- 2556
+- 983
+screen_top_left:
+- 1480
+- 87

select_regions.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import pyautogui
+from pynput import keyboard
+import yaml
+regions = [
+    "screen_top_left",
+    "screen_bot_right",
+]
+map_regions = [
+    "map_top_left",
+    "map_bot_right",
+    "confirm_button",
+    "kodiak",
+    "hobart",
+]
+next_round_button = "next_round_button"
+coords = []
+PRESS_KEY = "a"
+def on_press(key):
+    try:
+        if key.char == PRESS_KEY:
+            x, y = pyautogui.position()
+            print(x, y)
+            coords.append([x, y])
+            return False
+    except AttributeError:
+        pass
+def get_coords(players=1):
+    for region in regions:
+        print(f"Move the mouse to the {region} region and press 'a'.")
+        with keyboard.Listener(on_press=on_press) as keyboard_listener:
+            keyboard_listener.join(timeout=40)
+    for p in range(1, players+1):
+        for region in map_regions:
+            region = region + f"_{p}"
+            regions.append(region)
+            print(f"Move the mouse to the {region} region and press 'a'.")
+            with keyboard.Listener(on_press=on_press) as keyboard_listener:
+                keyboard_listener.join(timeout=40)
+    regions.append(next_round_button)
+    print(f"Move the mouse to the {next_round_button} region and press 'a'.")
+    with keyboard.Listener(on_press=on_press) as keyboard_listener:
+        keyboard_listener.join(timeout=40)
+    screen_regions = {reg: coord for reg, coord in zip(regions, coords)}
+    # save dict as a yaml file
+    with open("screen_regions.yaml", "w") as f:
+        yaml.dump(screen_regions, f)
+    return screen_regions
+if __name__ == "__main__":
+    _ = get_coords(players=1)