Spaces:
Building
Building
Commit
·
54b4730
0
Parent(s):
push project to github
Browse files- .gitignore +3 -0
- dev_geoguessr_bot.ipynb +0 -0
- geoguessr_bot.py +194 -0
- main_single_player.py +60 -0
- readme.md +0 -0
- requirements.txt +0 -0
- screen_regions.yaml +24 -0
- select_regions.py +66 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
venv/
|
2 |
+
.env
|
3 |
+
__pycache__
|
dev_geoguessr_bot.ipynb
ADDED
File without changes
|
geoguessr_bot.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from io import BytesIO
|
2 |
+
import os
|
3 |
+
import dotenv
|
4 |
+
import base64
|
5 |
+
import pyautogui
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import math
|
8 |
+
from time import time, sleep
|
9 |
+
from typing import Tuple, List
|
10 |
+
from PIL import Image
|
11 |
+
|
12 |
+
from langchain_core.messages import HumanMessage, BaseMessage
|
13 |
+
from langchain_openai import ChatOpenAI
|
14 |
+
from langchain_anthropic import ChatAnthropic
|
15 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
16 |
+
|
17 |
+
dotenv.load_dotenv()
|
18 |
+
|
19 |
+
|
20 |
+
PROMPT_INSTRUCTIONS = """
|
21 |
+
Try to predict where the image was taken.
|
22 |
+
First describe the relevant details in the image to do it.
|
23 |
+
List some regions and places where it could be.
|
24 |
+
Chose the most likely Country and City or Specific Location.
|
25 |
+
At the end, in the last line a part from the previous reasoning, write the Latitude and Longitude from that guessed location
|
26 |
+
using the following format, making sure that the coords are valid floats, without anything else and making sure to be consistent with the format:
|
27 |
+
Lat: XX.XXXX, Lon: XX.XXXX
|
28 |
+
"""
|
29 |
+
|
30 |
+
|
31 |
+
class GeoBot:
|
32 |
+
prompt_instructions: str = PROMPT_INSTRUCTIONS
|
33 |
+
|
34 |
+
def __init__(self, screen_regions, player=1, model=ChatOpenAI, model_name="gpt-4o"):
|
35 |
+
self.player = player
|
36 |
+
self.screen_regions = screen_regions
|
37 |
+
self.screen_x, self.screen_y = screen_regions["screen_top_left"]
|
38 |
+
self.screen_w = screen_regions["screen_bot_right"][0] - self.screen_x
|
39 |
+
self.screen_h = screen_regions["screen_bot_right"][1] - self.screen_y
|
40 |
+
self.screen_xywh = (self.screen_x, self.screen_y, self.screen_w, self.screen_h)
|
41 |
+
|
42 |
+
self.map_x, self.map_y = screen_regions[f"map_top_left_{player}"]
|
43 |
+
self.map_w = screen_regions[f"map_bot_right_{player}"][0] - self.map_x
|
44 |
+
self.map_h = screen_regions[f"map_bot_right_{player}"][1] - self.map_y
|
45 |
+
self.minimap_xywh = (self.map_x, self.map_y, self.map_w, self.map_h)
|
46 |
+
|
47 |
+
self.next_round_button = screen_regions["next_round_button"] if player==1 else None
|
48 |
+
self.confirm_button = screen_regions[f"confirm_button_{player}"]
|
49 |
+
|
50 |
+
self.kodiak_x, self.kodiak_y = screen_regions[f"kodiak_{player}"]
|
51 |
+
self.hobart_x, self.hobart_y = screen_regions[f"hobart_{player}"]
|
52 |
+
|
53 |
+
# Refernece points to calibrate the minimap everytime
|
54 |
+
self.kodiak_lat, self.kodiak_lon = (57.7916, -152.4083)
|
55 |
+
self.hobart_lat, self.hobart_lon = (-42.8833, 147.3355)
|
56 |
+
|
57 |
+
self.model = model(model=model_name)
|
58 |
+
|
59 |
+
|
60 |
+
@staticmethod
|
61 |
+
def pil_to_base64(image: Image) -> str:
|
62 |
+
buffered = BytesIO()
|
63 |
+
image.save(buffered, format="PNG")
|
64 |
+
img_base64_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
65 |
+
|
66 |
+
return img_base64_str
|
67 |
+
|
68 |
+
|
69 |
+
@classmethod
|
70 |
+
def create_message(cls, images_data: List[str]) -> HumanMessage:
|
71 |
+
message = HumanMessage(
|
72 |
+
content=[
|
73 |
+
{
|
74 |
+
"type": "text",
|
75 |
+
"text": cls.prompt_instructions,
|
76 |
+
},
|
77 |
+
] + [
|
78 |
+
{
|
79 |
+
"type": "image_url",
|
80 |
+
"image_url": {"url": f"data:image/png;base64,{img_data}"},
|
81 |
+
}
|
82 |
+
for img_data in images_data],
|
83 |
+
)
|
84 |
+
|
85 |
+
return message
|
86 |
+
|
87 |
+
|
88 |
+
def extract_location_from_response(self, response: BaseMessage) -> Tuple[float, float]:
|
89 |
+
try:
|
90 |
+
response = response.content.split("\n")
|
91 |
+
while response and len(response[-1]) == 0 and "lat" not in response[-1].lower():
|
92 |
+
response.pop()
|
93 |
+
if response:
|
94 |
+
prediction = response[-1]
|
95 |
+
else:
|
96 |
+
return None
|
97 |
+
print(f"\n-------\n{self.model} Prediction:\n", prediction)
|
98 |
+
|
99 |
+
# Lat: 57.7916, Lon: -152.4083
|
100 |
+
lat = float(prediction.split(",")[0].split(":")[1])
|
101 |
+
lon = float(prediction.split(",")[1].split(":")[1])
|
102 |
+
|
103 |
+
x, y = self.lat_lon_to_mercator_map_pixels(lat, lon)
|
104 |
+
print(f"Normalized pixel coordinates: ({x}, {y})")
|
105 |
+
|
106 |
+
if x < self.map_x:
|
107 |
+
x = self.map_x
|
108 |
+
print("x out of bounds")
|
109 |
+
elif x > self.map_x+self.map_w:
|
110 |
+
x = self.map_x+self.map_w
|
111 |
+
print("x out of bounds")
|
112 |
+
if y < self.map_y:
|
113 |
+
y = self.map_y
|
114 |
+
print("y out of bounds")
|
115 |
+
elif y > self.map_y+self.map_h:
|
116 |
+
y = self.map_y+self.map_h
|
117 |
+
print("y out of bounds")
|
118 |
+
|
119 |
+
return x, y
|
120 |
+
|
121 |
+
except Exception as e:
|
122 |
+
print("Error:", e)
|
123 |
+
return None
|
124 |
+
|
125 |
+
|
126 |
+
@staticmethod
|
127 |
+
def lat_to_mercator_y(lat: float) -> float:
|
128 |
+
return math.log(math.tan(math.pi / 4 + math.radians(lat) / 2))
|
129 |
+
|
130 |
+
|
131 |
+
def lat_lon_to_mercator_map_pixels(self, lat: float, lon: float) -> Tuple[int, int]:
|
132 |
+
"""
|
133 |
+
Convert latitude and longitude to pixel coordinates on the mercator projection minimap,
|
134 |
+
taking two known points 1 and 2 as a reference.
|
135 |
+
|
136 |
+
Args:
|
137 |
+
lat (float): Latitude (Decimal Degrees) of the point to convert.
|
138 |
+
lon (float): Longitude (Decimal Degrees) of the point to convert.
|
139 |
+
|
140 |
+
Returns:
|
141 |
+
tuple: x, y pixel coordinates of the point.
|
142 |
+
"""
|
143 |
+
|
144 |
+
# Calculate the x pixel coordinate
|
145 |
+
lon_diff_ref = (self.kodiak_lon - self.hobart_lon)
|
146 |
+
lon_diff = (self.kodiak_lon - lon)
|
147 |
+
|
148 |
+
x = abs(self.kodiak_x - self.hobart_x) * (lon_diff / lon_diff_ref) + self.kodiak_x
|
149 |
+
|
150 |
+
# Convert latitude and longitude to mercator projection y coordinates
|
151 |
+
mercator_y1 = self.lat_to_mercator_y(self.kodiak_lat)
|
152 |
+
mercator_y2 = self.lat_to_mercator_y(self.hobart_lat)
|
153 |
+
mercator_y = self.lat_to_mercator_y(lat)
|
154 |
+
|
155 |
+
# Calculate the y pixel coordinate
|
156 |
+
lat_diff_ref = (mercator_y1 - mercator_y2)
|
157 |
+
lat_diff = (mercator_y1 - mercator_y)
|
158 |
+
|
159 |
+
y = abs(self.kodiak_y - self.hobart_y) * (lat_diff / lat_diff_ref) + self.kodiak_y
|
160 |
+
|
161 |
+
return round(x), round(y)
|
162 |
+
|
163 |
+
|
164 |
+
def select_map_location(self, x: int, y: int, plot: bool = False) -> None:
|
165 |
+
# Hovering over the minimap to expand it
|
166 |
+
pyautogui.moveTo(self.map_x+self.map_w-15, self.map_y+self.map_h-15, duration=0.5)
|
167 |
+
sleep(0.5)
|
168 |
+
|
169 |
+
# Clicking on the predicted location
|
170 |
+
pyautogui.click(x, y, duration=0.5)
|
171 |
+
|
172 |
+
if plot:
|
173 |
+
self.plot_minimap(x, y)
|
174 |
+
|
175 |
+
# Confirming the guessed location
|
176 |
+
pyautogui.click(self.confirm_button, duration=0.2)
|
177 |
+
sleep(2)
|
178 |
+
|
179 |
+
|
180 |
+
def plot_minimap(self, x: int = None, y: int = None) -> None:
|
181 |
+
minimap = pyautogui.screenshot(region=self.minimap_xywh)
|
182 |
+
plot_kodiak_x = self.kodiak_x - self.map_x
|
183 |
+
plot_kodiak_y = self.kodiak_y - self.map_y
|
184 |
+
plot_hobart_x = self.hobart_x - self.map_x
|
185 |
+
plot_hobart_y = self.hobart_y - self.map_y
|
186 |
+
plt.imshow(minimap)
|
187 |
+
plt.plot(plot_hobart_x, plot_hobart_y, 'ro')
|
188 |
+
plt.plot(plot_kodiak_x, plot_kodiak_y, 'ro')
|
189 |
+
if x and y:
|
190 |
+
plt.plot(x-self.map_x, y-self.map_y, 'bo')
|
191 |
+
|
192 |
+
os.makedirs("plots", exist_ok=True)
|
193 |
+
plt.savefig("plots/minimap.png")
|
194 |
+
# plt.show()
|
main_single_player.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pyautogui
|
2 |
+
import yaml
|
3 |
+
import os
|
4 |
+
from time import sleep
|
5 |
+
|
6 |
+
from langchain_openai import ChatOpenAI
|
7 |
+
from langchain_anthropic import ChatAnthropic
|
8 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
9 |
+
|
10 |
+
from select_regions import get_coords
|
11 |
+
from geoguessr_bot import GeoBot
|
12 |
+
|
13 |
+
|
14 |
+
def play_turn(bot: GeoBot, plot: bool = False):
|
15 |
+
screenshot = pyautogui.screenshot(region=bot.screen_xywh)
|
16 |
+
screenshot_b64 = GeoBot.pil_to_base64(screenshot)
|
17 |
+
message = GeoBot.create_message([screenshot_b64])
|
18 |
+
|
19 |
+
response = bot.model.invoke([message])
|
20 |
+
print(response.content)
|
21 |
+
|
22 |
+
location = bot.extract_location_from_response(response)
|
23 |
+
if location is None:
|
24 |
+
# Second try
|
25 |
+
response = bot.model.invoke([message])
|
26 |
+
print(response.content)
|
27 |
+
location = bot.extract_location_from_response(response)
|
28 |
+
|
29 |
+
if location is not None:
|
30 |
+
bot.select_map_location(*location, plot=plot)
|
31 |
+
else:
|
32 |
+
print("Error getting a location for second time")
|
33 |
+
bot.select_map_location(x=1, y=1, plot=plot)
|
34 |
+
|
35 |
+
# Going to the next round
|
36 |
+
pyautogui.press(" ")
|
37 |
+
sleep(2)
|
38 |
+
|
39 |
+
|
40 |
+
def main(turns=5, plot=False):
|
41 |
+
if "screen_regions.yaml" not in os.listdir():
|
42 |
+
screen_regions = get_coords(players=1)
|
43 |
+
with open("screen_regions.yaml") as f:
|
44 |
+
screen_regions = yaml.safe_load(f)
|
45 |
+
|
46 |
+
bot = GeoBot(
|
47 |
+
screen_regions=screen_regions,
|
48 |
+
player=1,
|
49 |
+
model=ChatOpenAI, # ChatOpenAI, ChatGoogleGenerativeAI, ChatAnthropic
|
50 |
+
model_name="gpt-4o", # gpt-4o, gemini-1.5-pro, claude-3-5-sonnet-20240620
|
51 |
+
)
|
52 |
+
|
53 |
+
for turn in range(turns):
|
54 |
+
print("\n----------------")
|
55 |
+
print(f"Turn {turn+1}/{turns}")
|
56 |
+
play_turn(bot=bot, plot=plot)
|
57 |
+
|
58 |
+
|
59 |
+
if __name__ == "__main__":
|
60 |
+
main(turns=5, plot=True)
|
readme.md
ADDED
File without changes
|
requirements.txt
ADDED
Binary file (4.43 kB). View file
|
|
screen_regions.yaml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
confirm_button_1:
|
2 |
+
- 2485
|
3 |
+
- 959
|
4 |
+
hobart_1:
|
5 |
+
- 2417
|
6 |
+
- 773
|
7 |
+
kodiak_1:
|
8 |
+
- 1883
|
9 |
+
- 565
|
10 |
+
map_bot_right_1:
|
11 |
+
- 2521
|
12 |
+
- 919
|
13 |
+
map_top_left_1:
|
14 |
+
- 1820
|
15 |
+
- 420
|
16 |
+
next_round_button:
|
17 |
+
- 2020
|
18 |
+
- 907
|
19 |
+
screen_bot_right:
|
20 |
+
- 2556
|
21 |
+
- 983
|
22 |
+
screen_top_left:
|
23 |
+
- 1480
|
24 |
+
- 87
|
select_regions.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pyautogui
|
2 |
+
from pynput import keyboard
|
3 |
+
import yaml
|
4 |
+
|
5 |
+
|
6 |
+
regions = [
|
7 |
+
"screen_top_left",
|
8 |
+
"screen_bot_right",
|
9 |
+
]
|
10 |
+
|
11 |
+
map_regions = [
|
12 |
+
"map_top_left",
|
13 |
+
"map_bot_right",
|
14 |
+
"confirm_button",
|
15 |
+
"kodiak",
|
16 |
+
"hobart",
|
17 |
+
]
|
18 |
+
|
19 |
+
next_round_button = "next_round_button"
|
20 |
+
|
21 |
+
coords = []
|
22 |
+
|
23 |
+
PRESS_KEY = "a"
|
24 |
+
|
25 |
+
|
26 |
+
def on_press(key):
|
27 |
+
try:
|
28 |
+
if key.char == PRESS_KEY:
|
29 |
+
x, y = pyautogui.position()
|
30 |
+
print(x, y)
|
31 |
+
coords.append([x, y])
|
32 |
+
return False
|
33 |
+
except AttributeError:
|
34 |
+
pass
|
35 |
+
|
36 |
+
|
37 |
+
def get_coords(players=1):
|
38 |
+
for region in regions:
|
39 |
+
print(f"Move the mouse to the {region} region and press 'a'.")
|
40 |
+
with keyboard.Listener(on_press=on_press) as keyboard_listener:
|
41 |
+
keyboard_listener.join(timeout=40)
|
42 |
+
|
43 |
+
for p in range(1, players+1):
|
44 |
+
for region in map_regions:
|
45 |
+
region = region + f"_{p}"
|
46 |
+
regions.append(region)
|
47 |
+
print(f"Move the mouse to the {region} region and press 'a'.")
|
48 |
+
with keyboard.Listener(on_press=on_press) as keyboard_listener:
|
49 |
+
keyboard_listener.join(timeout=40)
|
50 |
+
|
51 |
+
regions.append(next_round_button)
|
52 |
+
print(f"Move the mouse to the {next_round_button} region and press 'a'.")
|
53 |
+
with keyboard.Listener(on_press=on_press) as keyboard_listener:
|
54 |
+
keyboard_listener.join(timeout=40)
|
55 |
+
|
56 |
+
screen_regions = {reg: coord for reg, coord in zip(regions, coords)}
|
57 |
+
|
58 |
+
# save dict as a yaml file
|
59 |
+
with open("screen_regions.yaml", "w") as f:
|
60 |
+
yaml.dump(screen_regions, f)
|
61 |
+
|
62 |
+
return screen_regions
|
63 |
+
|
64 |
+
|
65 |
+
if __name__ == "__main__":
|
66 |
+
_ = get_coords(players=1)
|