enricd commited on
Commit
54b4730
·
0 Parent(s):

push project to github

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ venv/
2
+ .env
3
+ __pycache__
dev_geoguessr_bot.ipynb ADDED
File without changes
geoguessr_bot.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ import os
3
+ import dotenv
4
+ import base64
5
+ import pyautogui
6
+ import matplotlib.pyplot as plt
7
+ import math
8
+ from time import time, sleep
9
+ from typing import Tuple, List
10
+ from PIL import Image
11
+
12
+ from langchain_core.messages import HumanMessage, BaseMessage
13
+ from langchain_openai import ChatOpenAI
14
+ from langchain_anthropic import ChatAnthropic
15
+ from langchain_google_genai import ChatGoogleGenerativeAI
16
+
17
+ dotenv.load_dotenv()
18
+
19
+
20
+ PROMPT_INSTRUCTIONS = """
21
+ Try to predict where the image was taken.
22
+ First describe the relevant details in the image to do it.
23
+ List some regions and places where it could be.
24
+ Chose the most likely Country and City or Specific Location.
25
+ At the end, in the last line a part from the previous reasoning, write the Latitude and Longitude from that guessed location
26
+ using the following format, making sure that the coords are valid floats, without anything else and making sure to be consistent with the format:
27
+ Lat: XX.XXXX, Lon: XX.XXXX
28
+ """
29
+
30
+
31
+ class GeoBot:
32
+ prompt_instructions: str = PROMPT_INSTRUCTIONS
33
+
34
+ def __init__(self, screen_regions, player=1, model=ChatOpenAI, model_name="gpt-4o"):
35
+ self.player = player
36
+ self.screen_regions = screen_regions
37
+ self.screen_x, self.screen_y = screen_regions["screen_top_left"]
38
+ self.screen_w = screen_regions["screen_bot_right"][0] - self.screen_x
39
+ self.screen_h = screen_regions["screen_bot_right"][1] - self.screen_y
40
+ self.screen_xywh = (self.screen_x, self.screen_y, self.screen_w, self.screen_h)
41
+
42
+ self.map_x, self.map_y = screen_regions[f"map_top_left_{player}"]
43
+ self.map_w = screen_regions[f"map_bot_right_{player}"][0] - self.map_x
44
+ self.map_h = screen_regions[f"map_bot_right_{player}"][1] - self.map_y
45
+ self.minimap_xywh = (self.map_x, self.map_y, self.map_w, self.map_h)
46
+
47
+ self.next_round_button = screen_regions["next_round_button"] if player==1 else None
48
+ self.confirm_button = screen_regions[f"confirm_button_{player}"]
49
+
50
+ self.kodiak_x, self.kodiak_y = screen_regions[f"kodiak_{player}"]
51
+ self.hobart_x, self.hobart_y = screen_regions[f"hobart_{player}"]
52
+
53
+ # Refernece points to calibrate the minimap everytime
54
+ self.kodiak_lat, self.kodiak_lon = (57.7916, -152.4083)
55
+ self.hobart_lat, self.hobart_lon = (-42.8833, 147.3355)
56
+
57
+ self.model = model(model=model_name)
58
+
59
+
60
+ @staticmethod
61
+ def pil_to_base64(image: Image) -> str:
62
+ buffered = BytesIO()
63
+ image.save(buffered, format="PNG")
64
+ img_base64_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
65
+
66
+ return img_base64_str
67
+
68
+
69
+ @classmethod
70
+ def create_message(cls, images_data: List[str]) -> HumanMessage:
71
+ message = HumanMessage(
72
+ content=[
73
+ {
74
+ "type": "text",
75
+ "text": cls.prompt_instructions,
76
+ },
77
+ ] + [
78
+ {
79
+ "type": "image_url",
80
+ "image_url": {"url": f"data:image/png;base64,{img_data}"},
81
+ }
82
+ for img_data in images_data],
83
+ )
84
+
85
+ return message
86
+
87
+
88
+ def extract_location_from_response(self, response: BaseMessage) -> Tuple[float, float]:
89
+ try:
90
+ response = response.content.split("\n")
91
+ while response and len(response[-1]) == 0 and "lat" not in response[-1].lower():
92
+ response.pop()
93
+ if response:
94
+ prediction = response[-1]
95
+ else:
96
+ return None
97
+ print(f"\n-------\n{self.model} Prediction:\n", prediction)
98
+
99
+ # Lat: 57.7916, Lon: -152.4083
100
+ lat = float(prediction.split(",")[0].split(":")[1])
101
+ lon = float(prediction.split(",")[1].split(":")[1])
102
+
103
+ x, y = self.lat_lon_to_mercator_map_pixels(lat, lon)
104
+ print(f"Normalized pixel coordinates: ({x}, {y})")
105
+
106
+ if x < self.map_x:
107
+ x = self.map_x
108
+ print("x out of bounds")
109
+ elif x > self.map_x+self.map_w:
110
+ x = self.map_x+self.map_w
111
+ print("x out of bounds")
112
+ if y < self.map_y:
113
+ y = self.map_y
114
+ print("y out of bounds")
115
+ elif y > self.map_y+self.map_h:
116
+ y = self.map_y+self.map_h
117
+ print("y out of bounds")
118
+
119
+ return x, y
120
+
121
+ except Exception as e:
122
+ print("Error:", e)
123
+ return None
124
+
125
+
126
+ @staticmethod
127
+ def lat_to_mercator_y(lat: float) -> float:
128
+ return math.log(math.tan(math.pi / 4 + math.radians(lat) / 2))
129
+
130
+
131
+ def lat_lon_to_mercator_map_pixels(self, lat: float, lon: float) -> Tuple[int, int]:
132
+ """
133
+ Convert latitude and longitude to pixel coordinates on the mercator projection minimap,
134
+ taking two known points 1 and 2 as a reference.
135
+
136
+ Args:
137
+ lat (float): Latitude (Decimal Degrees) of the point to convert.
138
+ lon (float): Longitude (Decimal Degrees) of the point to convert.
139
+
140
+ Returns:
141
+ tuple: x, y pixel coordinates of the point.
142
+ """
143
+
144
+ # Calculate the x pixel coordinate
145
+ lon_diff_ref = (self.kodiak_lon - self.hobart_lon)
146
+ lon_diff = (self.kodiak_lon - lon)
147
+
148
+ x = abs(self.kodiak_x - self.hobart_x) * (lon_diff / lon_diff_ref) + self.kodiak_x
149
+
150
+ # Convert latitude and longitude to mercator projection y coordinates
151
+ mercator_y1 = self.lat_to_mercator_y(self.kodiak_lat)
152
+ mercator_y2 = self.lat_to_mercator_y(self.hobart_lat)
153
+ mercator_y = self.lat_to_mercator_y(lat)
154
+
155
+ # Calculate the y pixel coordinate
156
+ lat_diff_ref = (mercator_y1 - mercator_y2)
157
+ lat_diff = (mercator_y1 - mercator_y)
158
+
159
+ y = abs(self.kodiak_y - self.hobart_y) * (lat_diff / lat_diff_ref) + self.kodiak_y
160
+
161
+ return round(x), round(y)
162
+
163
+
164
+ def select_map_location(self, x: int, y: int, plot: bool = False) -> None:
165
+ # Hovering over the minimap to expand it
166
+ pyautogui.moveTo(self.map_x+self.map_w-15, self.map_y+self.map_h-15, duration=0.5)
167
+ sleep(0.5)
168
+
169
+ # Clicking on the predicted location
170
+ pyautogui.click(x, y, duration=0.5)
171
+
172
+ if plot:
173
+ self.plot_minimap(x, y)
174
+
175
+ # Confirming the guessed location
176
+ pyautogui.click(self.confirm_button, duration=0.2)
177
+ sleep(2)
178
+
179
+
180
+ def plot_minimap(self, x: int = None, y: int = None) -> None:
181
+ minimap = pyautogui.screenshot(region=self.minimap_xywh)
182
+ plot_kodiak_x = self.kodiak_x - self.map_x
183
+ plot_kodiak_y = self.kodiak_y - self.map_y
184
+ plot_hobart_x = self.hobart_x - self.map_x
185
+ plot_hobart_y = self.hobart_y - self.map_y
186
+ plt.imshow(minimap)
187
+ plt.plot(plot_hobart_x, plot_hobart_y, 'ro')
188
+ plt.plot(plot_kodiak_x, plot_kodiak_y, 'ro')
189
+ if x and y:
190
+ plt.plot(x-self.map_x, y-self.map_y, 'bo')
191
+
192
+ os.makedirs("plots", exist_ok=True)
193
+ plt.savefig("plots/minimap.png")
194
+ # plt.show()
main_single_player.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pyautogui
2
+ import yaml
3
+ import os
4
+ from time import sleep
5
+
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_anthropic import ChatAnthropic
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+
10
+ from select_regions import get_coords
11
+ from geoguessr_bot import GeoBot
12
+
13
+
14
+ def play_turn(bot: GeoBot, plot: bool = False):
15
+ screenshot = pyautogui.screenshot(region=bot.screen_xywh)
16
+ screenshot_b64 = GeoBot.pil_to_base64(screenshot)
17
+ message = GeoBot.create_message([screenshot_b64])
18
+
19
+ response = bot.model.invoke([message])
20
+ print(response.content)
21
+
22
+ location = bot.extract_location_from_response(response)
23
+ if location is None:
24
+ # Second try
25
+ response = bot.model.invoke([message])
26
+ print(response.content)
27
+ location = bot.extract_location_from_response(response)
28
+
29
+ if location is not None:
30
+ bot.select_map_location(*location, plot=plot)
31
+ else:
32
+ print("Error getting a location for second time")
33
+ bot.select_map_location(x=1, y=1, plot=plot)
34
+
35
+ # Going to the next round
36
+ pyautogui.press(" ")
37
+ sleep(2)
38
+
39
+
40
+ def main(turns=5, plot=False):
41
+ if "screen_regions.yaml" not in os.listdir():
42
+ screen_regions = get_coords(players=1)
43
+ with open("screen_regions.yaml") as f:
44
+ screen_regions = yaml.safe_load(f)
45
+
46
+ bot = GeoBot(
47
+ screen_regions=screen_regions,
48
+ player=1,
49
+ model=ChatOpenAI, # ChatOpenAI, ChatGoogleGenerativeAI, ChatAnthropic
50
+ model_name="gpt-4o", # gpt-4o, gemini-1.5-pro, claude-3-5-sonnet-20240620
51
+ )
52
+
53
+ for turn in range(turns):
54
+ print("\n----------------")
55
+ print(f"Turn {turn+1}/{turns}")
56
+ play_turn(bot=bot, plot=plot)
57
+
58
+
59
+ if __name__ == "__main__":
60
+ main(turns=5, plot=True)
readme.md ADDED
File without changes
requirements.txt ADDED
Binary file (4.43 kB). View file
 
screen_regions.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ confirm_button_1:
2
+ - 2485
3
+ - 959
4
+ hobart_1:
5
+ - 2417
6
+ - 773
7
+ kodiak_1:
8
+ - 1883
9
+ - 565
10
+ map_bot_right_1:
11
+ - 2521
12
+ - 919
13
+ map_top_left_1:
14
+ - 1820
15
+ - 420
16
+ next_round_button:
17
+ - 2020
18
+ - 907
19
+ screen_bot_right:
20
+ - 2556
21
+ - 983
22
+ screen_top_left:
23
+ - 1480
24
+ - 87
select_regions.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pyautogui
2
+ from pynput import keyboard
3
+ import yaml
4
+
5
+
6
+ regions = [
7
+ "screen_top_left",
8
+ "screen_bot_right",
9
+ ]
10
+
11
+ map_regions = [
12
+ "map_top_left",
13
+ "map_bot_right",
14
+ "confirm_button",
15
+ "kodiak",
16
+ "hobart",
17
+ ]
18
+
19
+ next_round_button = "next_round_button"
20
+
21
+ coords = []
22
+
23
+ PRESS_KEY = "a"
24
+
25
+
26
+ def on_press(key):
27
+ try:
28
+ if key.char == PRESS_KEY:
29
+ x, y = pyautogui.position()
30
+ print(x, y)
31
+ coords.append([x, y])
32
+ return False
33
+ except AttributeError:
34
+ pass
35
+
36
+
37
+ def get_coords(players=1):
38
+ for region in regions:
39
+ print(f"Move the mouse to the {region} region and press 'a'.")
40
+ with keyboard.Listener(on_press=on_press) as keyboard_listener:
41
+ keyboard_listener.join(timeout=40)
42
+
43
+ for p in range(1, players+1):
44
+ for region in map_regions:
45
+ region = region + f"_{p}"
46
+ regions.append(region)
47
+ print(f"Move the mouse to the {region} region and press 'a'.")
48
+ with keyboard.Listener(on_press=on_press) as keyboard_listener:
49
+ keyboard_listener.join(timeout=40)
50
+
51
+ regions.append(next_round_button)
52
+ print(f"Move the mouse to the {next_round_button} region and press 'a'.")
53
+ with keyboard.Listener(on_press=on_press) as keyboard_listener:
54
+ keyboard_listener.join(timeout=40)
55
+
56
+ screen_regions = {reg: coord for reg, coord in zip(regions, coords)}
57
+
58
+ # save dict as a yaml file
59
+ with open("screen_regions.yaml", "w") as f:
60
+ yaml.dump(screen_regions, f)
61
+
62
+ return screen_regions
63
+
64
+
65
+ if __name__ == "__main__":
66
+ _ = get_coords(players=1)